diff options
| author | TSR Berry <20988865+TSRBerry@users.noreply.github.com> | 2023-04-08 01:22:00 +0200 |
|---|---|---|
| committer | Mary <thog@protonmail.com> | 2023-04-27 23:51:14 +0200 |
| commit | cee712105850ac3385cd0091a923438167433f9f (patch) | |
| tree | 4a5274b21d8b7f938c0d0ce18736d3f2993b11b1 /src/Ryujinx.Graphics.Shader | |
| parent | cd124bda587ef09668a971fa1cac1c3f0cfc9f21 (diff) | |
Move solution and projects to src
Diffstat (limited to 'src/Ryujinx.Graphics.Shader')
174 files changed, 36779 insertions, 0 deletions
diff --git a/src/Ryujinx.Graphics.Shader/AlphaTestOp.cs b/src/Ryujinx.Graphics.Shader/AlphaTestOp.cs new file mode 100644 index 00000000..57c0d131 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/AlphaTestOp.cs @@ -0,0 +1,14 @@ +namespace Ryujinx.Graphics.Shader +{ + public enum AlphaTestOp + { + Never = 1, + Less, + Equal, + LessOrEqual, + Greater, + NotEqual, + GreaterOrEqual, + Always + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/AttributeType.cs b/src/Ryujinx.Graphics.Shader/AttributeType.cs new file mode 100644 index 00000000..4e6cad59 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/AttributeType.cs @@ -0,0 +1,38 @@ +using Ryujinx.Graphics.Shader.Translation; +using System; + +namespace Ryujinx.Graphics.Shader +{ + public enum AttributeType : byte + { + // Generic types. + Float, + Sint, + Uint + } + + static class AttributeTypeExtensions + { + public static string ToVec4Type(this AttributeType type) + { + return type switch + { + AttributeType.Float => "vec4", + AttributeType.Sint => "ivec4", + AttributeType.Uint => "uvec4", + _ => throw new ArgumentException($"Invalid attribute type \"{type}\".") + }; + } + + public static AggregateType ToAggregateType(this AttributeType type) + { + return type switch + { + AttributeType.Float => AggregateType.FP32, + AttributeType.Sint => AggregateType.S32, + AttributeType.Uint => AggregateType.U32, + _ => throw new ArgumentException($"Invalid attribute type \"{type}\".") + }; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/BufferDescriptor.cs b/src/Ryujinx.Graphics.Shader/BufferDescriptor.cs new file mode 100644 index 00000000..4ce8a896 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/BufferDescriptor.cs @@ -0,0 +1,26 @@ +namespace Ryujinx.Graphics.Shader +{ + public struct BufferDescriptor + { + // New fields should be added to the end of the struct to keep disk shader cache compatibility. + + public readonly int Binding; + public readonly int Slot; + public BufferUsageFlags Flags; + + public BufferDescriptor(int binding, int slot) + { + Binding = binding; + Slot = slot; + + Flags = BufferUsageFlags.None; + } + + public BufferDescriptor SetFlag(BufferUsageFlags flag) + { + Flags |= flag; + + return this; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/BufferUsageFlags.cs b/src/Ryujinx.Graphics.Shader/BufferUsageFlags.cs new file mode 100644 index 00000000..657546cb --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/BufferUsageFlags.cs @@ -0,0 +1,18 @@ +using System; + +namespace Ryujinx.Graphics.Shader +{ + /// <summary> + /// Flags that indicate how a buffer will be used in a shader. + /// </summary> + [Flags] + public enum BufferUsageFlags + { + None = 0, + + /// <summary> + /// Buffer is written to. + /// </summary> + Write = 1 << 0 + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/CodeGenContext.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/CodeGenContext.cs new file mode 100644 index 00000000..9eb20f6f --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/CodeGenContext.cs @@ -0,0 +1,95 @@ +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System.Text; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl +{ + class CodeGenContext + { + public const string Tab = " "; + + public StructuredFunction CurrentFunction { get; set; } + + public StructuredProgramInfo Info { get; } + + public ShaderConfig Config { get; } + + public OperandManager OperandManager { get; } + + private readonly StringBuilder _sb; + + private int _level; + + private string _indentation; + + public CodeGenContext(StructuredProgramInfo info, ShaderConfig config) + { + Info = info; + Config = config; + + OperandManager = new OperandManager(); + + _sb = new StringBuilder(); + } + + public void AppendLine() + { + _sb.AppendLine(); + } + + public void AppendLine(string str) + { + _sb.AppendLine(_indentation + str); + } + + public string GetCode() + { + return _sb.ToString(); + } + + public void EnterScope() + { + AppendLine("{"); + + _level++; + + UpdateIndentation(); + } + + public void LeaveScope(string suffix = "") + { + if (_level == 0) + { + return; + } + + _level--; + + UpdateIndentation(); + + AppendLine("}" + suffix); + } + + public StructuredFunction GetFunction(int id) + { + return Info.Functions[id]; + } + + private void UpdateIndentation() + { + _indentation = GetIndentation(_level); + } + + private static string GetIndentation(int level) + { + string indentation = string.Empty; + + for (int index = 0; index < level; index++) + { + indentation += Tab; + } + + return indentation; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs new file mode 100644 index 00000000..81b79ec4 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs @@ -0,0 +1,818 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Numerics; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl +{ + static class Declarations + { + public static void Declare(CodeGenContext context, StructuredProgramInfo info) + { + context.AppendLine(context.Config.Options.TargetApi == TargetApi.Vulkan ? "#version 460 core" : "#version 450 core"); + context.AppendLine("#extension GL_ARB_gpu_shader_int64 : enable"); + + if (context.Config.GpuAccessor.QueryHostSupportsShaderBallot()) + { + context.AppendLine("#extension GL_ARB_shader_ballot : enable"); + } + else + { + context.AppendLine("#extension GL_KHR_shader_subgroup_basic : enable"); + context.AppendLine("#extension GL_KHR_shader_subgroup_ballot : enable"); + } + + context.AppendLine("#extension GL_ARB_shader_group_vote : enable"); + context.AppendLine("#extension GL_EXT_shader_image_load_formatted : enable"); + context.AppendLine("#extension GL_EXT_texture_shadow_lod : enable"); + + if (context.Config.Stage == ShaderStage.Compute) + { + context.AppendLine("#extension GL_ARB_compute_shader : enable"); + } + else if (context.Config.Stage == ShaderStage.Fragment) + { + if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock()) + { + context.AppendLine("#extension GL_ARB_fragment_shader_interlock : enable"); + } + else if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderOrderingIntel()) + { + context.AppendLine("#extension GL_INTEL_fragment_shader_ordering : enable"); + } + } + else + { + if (context.Config.Stage == ShaderStage.Vertex) + { + context.AppendLine("#extension GL_ARB_shader_draw_parameters : enable"); + } + + context.AppendLine("#extension GL_ARB_shader_viewport_layer_array : enable"); + } + + if (context.Config.GpPassthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough()) + { + context.AppendLine("#extension GL_NV_geometry_shader_passthrough : enable"); + } + + if (context.Config.GpuAccessor.QueryHostSupportsViewportMask()) + { + context.AppendLine("#extension GL_NV_viewport_array2 : enable"); + } + + context.AppendLine("#pragma optionNV(fastmath off)"); + context.AppendLine(); + + context.AppendLine($"const int {DefaultNames.UndefinedName} = 0;"); + context.AppendLine(); + + if (context.Config.Stage == ShaderStage.Compute) + { + int localMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeLocalMemorySize(), 4); + + if (localMemorySize != 0) + { + string localMemorySizeStr = NumberFormatter.FormatInt(localMemorySize); + + context.AppendLine($"uint {DefaultNames.LocalMemoryName}[{localMemorySizeStr}];"); + context.AppendLine(); + } + + int sharedMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeSharedMemorySize(), 4); + + if (sharedMemorySize != 0) + { + string sharedMemorySizeStr = NumberFormatter.FormatInt(sharedMemorySize); + + context.AppendLine($"shared uint {DefaultNames.SharedMemoryName}[{sharedMemorySizeStr}];"); + context.AppendLine(); + } + } + else if (context.Config.LocalMemorySize != 0) + { + int localMemorySize = BitUtils.DivRoundUp(context.Config.LocalMemorySize, 4); + + string localMemorySizeStr = NumberFormatter.FormatInt(localMemorySize); + + context.AppendLine($"uint {DefaultNames.LocalMemoryName}[{localMemorySizeStr}];"); + context.AppendLine(); + } + + var cBufferDescriptors = context.Config.GetConstantBufferDescriptors(); + if (cBufferDescriptors.Length != 0) + { + DeclareUniforms(context, cBufferDescriptors); + + context.AppendLine(); + } + + var sBufferDescriptors = context.Config.GetStorageBufferDescriptors(); + if (sBufferDescriptors.Length != 0) + { + DeclareStorages(context, sBufferDescriptors); + + context.AppendLine(); + } + + var textureDescriptors = context.Config.GetTextureDescriptors(); + if (textureDescriptors.Length != 0) + { + DeclareSamplers(context, textureDescriptors); + + context.AppendLine(); + } + + var imageDescriptors = context.Config.GetImageDescriptors(); + if (imageDescriptors.Length != 0) + { + DeclareImages(context, imageDescriptors); + + context.AppendLine(); + } + + if (context.Config.Stage != ShaderStage.Compute) + { + if (context.Config.Stage == ShaderStage.Geometry) + { + InputTopology inputTopology = context.Config.GpuAccessor.QueryPrimitiveTopology(); + string inPrimitive = inputTopology.ToGlslString(); + + context.AppendLine($"layout (invocations = {context.Config.ThreadsPerInputPrimitive}, {inPrimitive}) in;"); + + if (context.Config.GpPassthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough()) + { + context.AppendLine($"layout (passthrough) in gl_PerVertex"); + context.EnterScope(); + context.AppendLine("vec4 gl_Position;"); + context.AppendLine("float gl_PointSize;"); + context.AppendLine("float gl_ClipDistance[];"); + context.LeaveScope(";"); + } + else + { + string outPrimitive = context.Config.OutputTopology.ToGlslString(); + + int maxOutputVertices = context.Config.GpPassthrough + ? inputTopology.ToInputVertices() + : context.Config.MaxOutputVertices; + + context.AppendLine($"layout ({outPrimitive}, max_vertices = {maxOutputVertices}) out;"); + } + + context.AppendLine(); + } + else if (context.Config.Stage == ShaderStage.TessellationControl) + { + int threadsPerInputPrimitive = context.Config.ThreadsPerInputPrimitive; + + context.AppendLine($"layout (vertices = {threadsPerInputPrimitive}) out;"); + context.AppendLine(); + } + else if (context.Config.Stage == ShaderStage.TessellationEvaluation) + { + bool tessCw = context.Config.GpuAccessor.QueryTessCw(); + + if (context.Config.Options.TargetApi == TargetApi.Vulkan) + { + // We invert the front face on Vulkan backend, so we need to do that here aswell. + tessCw = !tessCw; + } + + string patchType = context.Config.GpuAccessor.QueryTessPatchType().ToGlsl(); + string spacing = context.Config.GpuAccessor.QueryTessSpacing().ToGlsl(); + string windingOrder = tessCw ? "cw" : "ccw"; + + context.AppendLine($"layout ({patchType}, {spacing}, {windingOrder}) in;"); + context.AppendLine(); + } + + if (context.Config.UsedInputAttributes != 0 || context.Config.GpPassthrough) + { + DeclareInputAttributes(context, info); + + context.AppendLine(); + } + + if (context.Config.UsedOutputAttributes != 0 || context.Config.Stage != ShaderStage.Fragment) + { + DeclareOutputAttributes(context, info); + + context.AppendLine(); + } + + if (context.Config.UsedInputAttributesPerPatch.Count != 0) + { + DeclareInputAttributesPerPatch(context, context.Config.UsedInputAttributesPerPatch); + + context.AppendLine(); + } + + if (context.Config.UsedOutputAttributesPerPatch.Count != 0) + { + DeclareUsedOutputAttributesPerPatch(context, context.Config.UsedOutputAttributesPerPatch); + + context.AppendLine(); + } + + if (context.Config.TransformFeedbackEnabled && context.Config.LastInVertexPipeline) + { + var tfOutput = context.Config.GetTransformFeedbackOutput(AttributeConsts.PositionX); + if (tfOutput.Valid) + { + context.AppendLine($"layout (xfb_buffer = {tfOutput.Buffer}, xfb_offset = {tfOutput.Offset}, xfb_stride = {tfOutput.Stride}) out gl_PerVertex"); + context.EnterScope(); + context.AppendLine("vec4 gl_Position;"); + context.LeaveScope(context.Config.Stage == ShaderStage.TessellationControl ? " gl_out[];" : ";"); + } + } + } + else + { + string localSizeX = NumberFormatter.FormatInt(context.Config.GpuAccessor.QueryComputeLocalSizeX()); + string localSizeY = NumberFormatter.FormatInt(context.Config.GpuAccessor.QueryComputeLocalSizeY()); + string localSizeZ = NumberFormatter.FormatInt(context.Config.GpuAccessor.QueryComputeLocalSizeZ()); + + context.AppendLine( + "layout (" + + $"local_size_x = {localSizeX}, " + + $"local_size_y = {localSizeY}, " + + $"local_size_z = {localSizeZ}) in;"); + context.AppendLine(); + } + + bool isFragment = context.Config.Stage == ShaderStage.Fragment; + + if (isFragment || context.Config.Stage == ShaderStage.Compute || context.Config.Stage == ShaderStage.Vertex) + { + if (isFragment && context.Config.GpuAccessor.QueryEarlyZForce()) + { + context.AppendLine("layout(early_fragment_tests) in;"); + context.AppendLine(); + } + + if ((context.Config.UsedFeatures & (FeatureFlags.FragCoordXY | FeatureFlags.IntegerSampling)) != 0) + { + string stage = OperandManager.GetShaderStagePrefix(context.Config.Stage); + + int scaleElements = context.Config.GetTextureDescriptors().Length + context.Config.GetImageDescriptors().Length; + + if (isFragment) + { + scaleElements++; // Also includes render target scale, for gl_FragCoord. + } + + DeclareSupportUniformBlock(context, context.Config.Stage, scaleElements); + + if (context.Config.UsedFeatures.HasFlag(FeatureFlags.IntegerSampling) && scaleElements != 0) + { + AppendHelperFunction(context, $"Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_{stage}.glsl"); + context.AppendLine(); + } + } + else if (isFragment || context.Config.Stage == ShaderStage.Vertex) + { + DeclareSupportUniformBlock(context, context.Config.Stage, 0); + } + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.AtomicMinMaxS32Shared) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.AtomicMinMaxS32Storage) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.MultiplyHighS32) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.MultiplyHighU32) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighU32.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.Shuffle) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleDown) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleUp) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleXor) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.StoreSharedSmallInt) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.StoreStorageSmallInt) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.SwizzleAdd) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl"); + } + } + + private static string GetTfLayout(TransformFeedbackOutput tfOutput) + { + if (tfOutput.Valid) + { + return $"layout (xfb_buffer = {tfOutput.Buffer}, xfb_offset = {tfOutput.Offset}, xfb_stride = {tfOutput.Stride}) "; + } + + return string.Empty; + } + + public static void DeclareLocals(CodeGenContext context, StructuredFunction function) + { + foreach (AstOperand decl in function.Locals) + { + string name = context.OperandManager.DeclareLocal(decl); + + context.AppendLine(GetVarTypeName(context, decl.VarType) + " " + name + ";"); + } + } + + public static string GetVarTypeName(CodeGenContext context, AggregateType type, bool precise = true) + { + if (context.Config.GpuAccessor.QueryHostReducedPrecision()) + { + precise = false; + } + + return type switch + { + AggregateType.Void => "void", + AggregateType.Bool => "bool", + AggregateType.FP32 => precise ? "precise float" : "float", + AggregateType.FP64 => "double", + AggregateType.S32 => "int", + AggregateType.U32 => "uint", + AggregateType.Vector2 | AggregateType.Bool => "bvec2", + AggregateType.Vector2 | AggregateType.FP32 => precise ? "precise vec2" : "vec2", + AggregateType.Vector2 | AggregateType.FP64 => "dvec2", + AggregateType.Vector2 | AggregateType.S32 => "ivec2", + AggregateType.Vector2 | AggregateType.U32 => "uvec2", + AggregateType.Vector3 | AggregateType.Bool => "bvec3", + AggregateType.Vector3 | AggregateType.FP32 => precise ? "precise vec3" : "vec3", + AggregateType.Vector3 | AggregateType.FP64 => "dvec3", + AggregateType.Vector3 | AggregateType.S32 => "ivec3", + AggregateType.Vector3 | AggregateType.U32 => "uvec3", + AggregateType.Vector4 | AggregateType.Bool => "bvec4", + AggregateType.Vector4 | AggregateType.FP32 => precise ? "precise vec4" : "vec4", + AggregateType.Vector4 | AggregateType.FP64 => "dvec4", + AggregateType.Vector4 | AggregateType.S32 => "ivec4", + AggregateType.Vector4 | AggregateType.U32 => "uvec4", + _ => throw new ArgumentException($"Invalid variable type \"{type}\".") + }; + } + + private static void DeclareUniforms(CodeGenContext context, BufferDescriptor[] descriptors) + { + string ubSize = "[" + NumberFormatter.FormatInt(Constants.ConstantBufferSize / 16) + "]"; + + if (context.Config.UsedFeatures.HasFlag(FeatureFlags.CbIndexing)) + { + string ubName = OperandManager.GetShaderStagePrefix(context.Config.Stage); + + ubName += "_" + DefaultNames.UniformNamePrefix; + + string blockName = $"{ubName}_{DefaultNames.BlockSuffix}"; + + context.AppendLine($"layout (binding = {context.Config.FirstConstantBufferBinding}, std140) uniform {blockName}"); + context.EnterScope(); + context.AppendLine("vec4 " + DefaultNames.DataName + ubSize + ";"); + context.LeaveScope($" {ubName}[{NumberFormatter.FormatInt(descriptors.Max(x => x.Slot) + 1)}];"); + } + else + { + foreach (var descriptor in descriptors) + { + string ubName = OperandManager.GetShaderStagePrefix(context.Config.Stage); + + ubName += "_" + DefaultNames.UniformNamePrefix + descriptor.Slot; + + context.AppendLine($"layout (binding = {descriptor.Binding}, std140) uniform {ubName}"); + context.EnterScope(); + context.AppendLine("vec4 " + OperandManager.GetUbName(context.Config.Stage, descriptor.Slot, false) + ubSize + ";"); + context.LeaveScope(";"); + } + } + } + + private static void DeclareStorages(CodeGenContext context, BufferDescriptor[] descriptors) + { + string sbName = OperandManager.GetShaderStagePrefix(context.Config.Stage); + + sbName += "_" + DefaultNames.StorageNamePrefix; + + string blockName = $"{sbName}_{DefaultNames.BlockSuffix}"; + + string layout = context.Config.Options.TargetApi == TargetApi.Vulkan ? ", set = 1" : string.Empty; + + context.AppendLine($"layout (binding = {context.Config.FirstStorageBufferBinding}{layout}, std430) buffer {blockName}"); + context.EnterScope(); + context.AppendLine("uint " + DefaultNames.DataName + "[];"); + context.LeaveScope($" {sbName}[{NumberFormatter.FormatInt(descriptors.Max(x => x.Slot) + 1)}];"); + } + + private static void DeclareSamplers(CodeGenContext context, TextureDescriptor[] descriptors) + { + int arraySize = 0; + foreach (var descriptor in descriptors) + { + if (descriptor.Type.HasFlag(SamplerType.Indexed)) + { + if (arraySize == 0) + { + arraySize = ShaderConfig.SamplerArraySize; + } + else if (--arraySize != 0) + { + continue; + } + } + + string indexExpr = NumberFormatter.FormatInt(arraySize); + + string samplerName = OperandManager.GetSamplerName( + context.Config.Stage, + descriptor.CbufSlot, + descriptor.HandleIndex, + descriptor.Type.HasFlag(SamplerType.Indexed), + indexExpr); + + string samplerTypeName = descriptor.Type.ToGlslSamplerType(); + + string layout = string.Empty; + + if (context.Config.Options.TargetApi == TargetApi.Vulkan) + { + layout = ", set = 2"; + } + + context.AppendLine($"layout (binding = {descriptor.Binding}{layout}) uniform {samplerTypeName} {samplerName};"); + } + } + + private static void DeclareImages(CodeGenContext context, TextureDescriptor[] descriptors) + { + int arraySize = 0; + foreach (var descriptor in descriptors) + { + if (descriptor.Type.HasFlag(SamplerType.Indexed)) + { + if (arraySize == 0) + { + arraySize = ShaderConfig.SamplerArraySize; + } + else if (--arraySize != 0) + { + continue; + } + } + + string indexExpr = NumberFormatter.FormatInt(arraySize); + + string imageName = OperandManager.GetImageName( + context.Config.Stage, + descriptor.CbufSlot, + descriptor.HandleIndex, + descriptor.Format, + descriptor.Type.HasFlag(SamplerType.Indexed), + indexExpr); + + string imageTypeName = descriptor.Type.ToGlslImageType(descriptor.Format.GetComponentType()); + + if (descriptor.Flags.HasFlag(TextureUsageFlags.ImageCoherent)) + { + imageTypeName = "coherent " + imageTypeName; + } + + string layout = descriptor.Format.ToGlslFormat(); + + if (!string.IsNullOrEmpty(layout)) + { + layout = ", " + layout; + } + + if (context.Config.Options.TargetApi == TargetApi.Vulkan) + { + layout = $", set = 3{layout}"; + } + + context.AppendLine($"layout (binding = {descriptor.Binding}{layout}) uniform {imageTypeName} {imageName};"); + } + } + + private static void DeclareInputAttributes(CodeGenContext context, StructuredProgramInfo info) + { + if (context.Config.UsedFeatures.HasFlag(FeatureFlags.IaIndexing)) + { + string suffix = context.Config.Stage == ShaderStage.Geometry ? "[]" : string.Empty; + + context.AppendLine($"layout (location = 0) in vec4 {DefaultNames.IAttributePrefix}{suffix}[{Constants.MaxAttributes}];"); + } + else + { + int usedAttributes = context.Config.UsedInputAttributes | context.Config.PassthroughAttributes; + while (usedAttributes != 0) + { + int index = BitOperations.TrailingZeroCount(usedAttributes); + DeclareInputAttribute(context, info, index); + usedAttributes &= ~(1 << index); + } + } + } + + private static void DeclareInputAttributesPerPatch(CodeGenContext context, HashSet<int> attrs) + { + foreach (int attr in attrs.Order()) + { + DeclareInputAttributePerPatch(context, attr); + } + } + + private static void DeclareInputAttribute(CodeGenContext context, StructuredProgramInfo info, int attr) + { + string suffix = IsArrayAttributeGlsl(context.Config.Stage, isOutAttr: false) ? "[]" : string.Empty; + string iq = string.Empty; + + if (context.Config.Stage == ShaderStage.Fragment) + { + iq = context.Config.ImapTypes[attr].GetFirstUsedType() switch + { + PixelImap.Constant => "flat ", + PixelImap.ScreenLinear => "noperspective ", + _ => string.Empty + }; + } + + string name = $"{DefaultNames.IAttributePrefix}{attr}"; + + if (context.Config.TransformFeedbackEnabled && context.Config.Stage == ShaderStage.Fragment) + { + int components = context.Config.GetTransformFeedbackOutputComponents(attr, 0); + + if (components > 1) + { + string type = components switch + { + 2 => "vec2", + 3 => "vec3", + 4 => "vec4", + _ => "float" + }; + + context.AppendLine($"layout (location = {attr}) in {type} {name};"); + } + + for (int c = components > 1 ? components : 0; c < 4; c++) + { + char swzMask = "xyzw"[c]; + + context.AppendLine($"layout (location = {attr}, component = {c}) {iq}in float {name}_{swzMask}{suffix};"); + } + } + else + { + bool passthrough = (context.Config.PassthroughAttributes & (1 << attr)) != 0; + string pass = passthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough() ? "passthrough, " : string.Empty; + string type; + + if (context.Config.Stage == ShaderStage.Vertex) + { + type = context.Config.GpuAccessor.QueryAttributeType(attr).ToVec4Type(); + } + else + { + type = AttributeType.Float.ToVec4Type(); + } + + context.AppendLine($"layout ({pass}location = {attr}) {iq}in {type} {name}{suffix};"); + } + } + + private static void DeclareInputAttributePerPatch(CodeGenContext context, int attr) + { + int location = context.Config.GetPerPatchAttributeLocation(attr); + string name = $"{DefaultNames.PerPatchAttributePrefix}{attr}"; + + context.AppendLine($"layout (location = {location}) patch in vec4 {name};"); + } + + private static void DeclareOutputAttributes(CodeGenContext context, StructuredProgramInfo info) + { + if (context.Config.UsedFeatures.HasFlag(FeatureFlags.OaIndexing)) + { + context.AppendLine($"layout (location = 0) out vec4 {DefaultNames.OAttributePrefix}[{Constants.MaxAttributes}];"); + } + else + { + int usedAttributes = context.Config.UsedOutputAttributes; + + if (context.Config.Stage == ShaderStage.Fragment && context.Config.GpuAccessor.QueryDualSourceBlendEnable()) + { + int firstOutput = BitOperations.TrailingZeroCount(usedAttributes); + int mask = 3 << firstOutput; + + if ((usedAttributes & mask) == mask) + { + usedAttributes &= ~mask; + DeclareOutputDualSourceBlendAttribute(context, firstOutput); + } + } + + while (usedAttributes != 0) + { + int index = BitOperations.TrailingZeroCount(usedAttributes); + DeclareOutputAttribute(context, index); + usedAttributes &= ~(1 << index); + } + } + } + + private static void DeclareOutputAttribute(CodeGenContext context, int attr) + { + string suffix = IsArrayAttributeGlsl(context.Config.Stage, isOutAttr: true) ? "[]" : string.Empty; + string name = $"{DefaultNames.OAttributePrefix}{attr}{suffix}"; + + if (context.Config.TransformFeedbackEnabled && context.Config.LastInVertexPipeline) + { + int components = context.Config.GetTransformFeedbackOutputComponents(attr, 0); + + if (components > 1) + { + string type = components switch + { + 2 => "vec2", + 3 => "vec3", + 4 => "vec4", + _ => "float" + }; + + string xfb = string.Empty; + + var tfOutput = context.Config.GetTransformFeedbackOutput(attr, 0); + if (tfOutput.Valid) + { + xfb = $", xfb_buffer = {tfOutput.Buffer}, xfb_offset = {tfOutput.Offset}, xfb_stride = {tfOutput.Stride}"; + } + + context.AppendLine($"layout (location = {attr}{xfb}) out {type} {name};"); + } + + for (int c = components > 1 ? components : 0; c < 4; c++) + { + char swzMask = "xyzw"[c]; + + string xfb = string.Empty; + + var tfOutput = context.Config.GetTransformFeedbackOutput(attr, c); + if (tfOutput.Valid) + { + xfb = $", xfb_buffer = {tfOutput.Buffer}, xfb_offset = {tfOutput.Offset}, xfb_stride = {tfOutput.Stride}"; + } + + context.AppendLine($"layout (location = {attr}, component = {c}{xfb}) out float {name}_{swzMask};"); + } + } + else + { + string type = context.Config.Stage != ShaderStage.Fragment ? "vec4" : + context.Config.GpuAccessor.QueryFragmentOutputType(attr) switch + { + AttributeType.Sint => "ivec4", + AttributeType.Uint => "uvec4", + _ => "vec4" + }; + + if (context.Config.GpuAccessor.QueryHostReducedPrecision() && context.Config.Stage == ShaderStage.Vertex && attr == 0) + { + context.AppendLine($"layout (location = {attr}) invariant out {type} {name};"); + } + else + { + context.AppendLine($"layout (location = {attr}) out {type} {name};"); + } + } + } + + private static void DeclareOutputDualSourceBlendAttribute(CodeGenContext context, int attr) + { + string name = $"{DefaultNames.OAttributePrefix}{attr}"; + string name2 = $"{DefaultNames.OAttributePrefix}{(attr + 1)}"; + + context.AppendLine($"layout (location = {attr}, index = 0) out vec4 {name};"); + context.AppendLine($"layout (location = {attr}, index = 1) out vec4 {name2};"); + } + + private static bool IsArrayAttributeGlsl(ShaderStage stage, bool isOutAttr) + { + if (isOutAttr) + { + return stage == ShaderStage.TessellationControl; + } + else + { + return stage == ShaderStage.TessellationControl || + stage == ShaderStage.TessellationEvaluation || + stage == ShaderStage.Geometry; + } + } + + private static void DeclareUsedOutputAttributesPerPatch(CodeGenContext context, HashSet<int> attrs) + { + foreach (int attr in attrs.Order()) + { + DeclareOutputAttributePerPatch(context, attr); + } + } + + private static void DeclareOutputAttributePerPatch(CodeGenContext context, int attr) + { + int location = context.Config.GetPerPatchAttributeLocation(attr); + string name = $"{DefaultNames.PerPatchAttributePrefix}{attr}"; + + context.AppendLine($"layout (location = {location}) patch out vec4 {name};"); + } + + private static void DeclareSupportUniformBlock(CodeGenContext context, ShaderStage stage, int scaleElements) + { + bool needsSupportBlock = stage == ShaderStage.Fragment || + (context.Config.LastInVertexPipeline && context.Config.GpuAccessor.QueryViewportTransformDisable()); + + if (!needsSupportBlock && scaleElements == 0) + { + return; + } + + context.AppendLine($"layout (binding = 0, std140) uniform {DefaultNames.SupportBlockName}"); + context.EnterScope(); + + switch (stage) + { + case ShaderStage.Fragment: + case ShaderStage.Vertex: + context.AppendLine($"uint {DefaultNames.SupportBlockAlphaTestName};"); + context.AppendLine($"bool {DefaultNames.SupportBlockIsBgraName}[{SupportBuffer.FragmentIsBgraCount}];"); + context.AppendLine($"vec4 {DefaultNames.SupportBlockViewportInverse};"); + context.AppendLine($"int {DefaultNames.SupportBlockFragmentScaleCount};"); + break; + case ShaderStage.Compute: + context.AppendLine($"uint s_reserved[{SupportBuffer.ComputeRenderScaleOffset / SupportBuffer.FieldSize}];"); + break; + } + + context.AppendLine($"float {DefaultNames.SupportBlockRenderScaleName}[{SupportBuffer.RenderScaleMaxCount}];"); + + context.LeaveScope(";"); + context.AppendLine(); + } + + private static void AppendHelperFunction(CodeGenContext context, string filename) + { + string code = EmbeddedResources.ReadAllText(filename); + + code = code.Replace("\t", CodeGenContext.Tab); + code = code.Replace("$SHARED_MEM$", DefaultNames.SharedMemoryName); + code = code.Replace("$STORAGE_MEM$", OperandManager.GetShaderStagePrefix(context.Config.Stage) + "_" + DefaultNames.StorageNamePrefix); + + if (context.Config.GpuAccessor.QueryHostSupportsShaderBallot()) + { + code = code.Replace("$SUBGROUP_INVOCATION$", "gl_SubGroupInvocationARB"); + code = code.Replace("$SUBGROUP_BROADCAST$", "readInvocationARB"); + } + else + { + code = code.Replace("$SUBGROUP_INVOCATION$", "gl_SubgroupInvocationID"); + code = code.Replace("$SUBGROUP_BROADCAST$", "subgroupBroadcast"); + } + + context.AppendLine(code); + context.AppendLine(); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs new file mode 100644 index 00000000..3ab4814c --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs @@ -0,0 +1,37 @@ +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl +{ + static class DefaultNames + { + public const string LocalNamePrefix = "temp"; + + public const string SamplerNamePrefix = "tex"; + public const string ImageNamePrefix = "img"; + + public const string PerPatchAttributePrefix = "patch_attr_"; + public const string IAttributePrefix = "in_attr"; + public const string OAttributePrefix = "out_attr"; + + public const string StorageNamePrefix = "s"; + + public const string DataName = "data"; + + public const string SupportBlockName = "support_block"; + public const string SupportBlockAlphaTestName = "s_alpha_test"; + public const string SupportBlockIsBgraName = "s_is_bgra"; + public const string SupportBlockViewportInverse = "s_viewport_inverse"; + public const string SupportBlockFragmentScaleCount = "s_frag_scale_count"; + public const string SupportBlockRenderScaleName = "s_render_scale"; + + public const string BlockSuffix = "block"; + + public const string UniformNamePrefix = "c"; + public const string UniformNameSuffix = "data"; + + public const string LocalMemoryName = "local_mem"; + public const string SharedMemoryName = "shared_mem"; + + public const string ArgumentNamePrefix = "a"; + + public const string UndefinedName = "undef"; + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs new file mode 100644 index 00000000..751d0350 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs @@ -0,0 +1,154 @@ +using Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; + +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.TypeConversion; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl +{ + static class GlslGenerator + { + private const string MainFunctionName = "main"; + + public static string Generate(StructuredProgramInfo info, ShaderConfig config) + { + CodeGenContext context = new CodeGenContext(info, config); + + Declarations.Declare(context, info); + + if (info.Functions.Count != 0) + { + for (int i = 1; i < info.Functions.Count; i++) + { + context.AppendLine($"{GetFunctionSignature(context, info.Functions[i])};"); + } + + context.AppendLine(); + + for (int i = 1; i < info.Functions.Count; i++) + { + PrintFunction(context, info, info.Functions[i]); + + context.AppendLine(); + } + } + + PrintFunction(context, info, info.Functions[0], MainFunctionName); + + return context.GetCode(); + } + + private static void PrintFunction(CodeGenContext context, StructuredProgramInfo info, StructuredFunction function, string funcName = null) + { + context.CurrentFunction = function; + + context.AppendLine(GetFunctionSignature(context, function, funcName)); + context.EnterScope(); + + Declarations.DeclareLocals(context, function); + + PrintBlock(context, function.MainBlock); + + context.LeaveScope(); + } + + private static string GetFunctionSignature(CodeGenContext context, StructuredFunction function, string funcName = null) + { + string[] args = new string[function.InArguments.Length + function.OutArguments.Length]; + + for (int i = 0; i < function.InArguments.Length; i++) + { + args[i] = $"{Declarations.GetVarTypeName(context, function.InArguments[i])} {OperandManager.GetArgumentName(i)}"; + } + + for (int i = 0; i < function.OutArguments.Length; i++) + { + int j = i + function.InArguments.Length; + + args[j] = $"out {Declarations.GetVarTypeName(context, function.OutArguments[i])} {OperandManager.GetArgumentName(j)}"; + } + + return $"{Declarations.GetVarTypeName(context, function.ReturnType)} {funcName ?? function.Name}({string.Join(", ", args)})"; + } + + private static void PrintBlock(CodeGenContext context, AstBlock block) + { + AstBlockVisitor visitor = new AstBlockVisitor(block); + + visitor.BlockEntered += (sender, e) => + { + switch (e.Block.Type) + { + case AstBlockType.DoWhile: + context.AppendLine("do"); + break; + + case AstBlockType.Else: + context.AppendLine("else"); + break; + + case AstBlockType.ElseIf: + context.AppendLine($"else if ({GetCondExpr(context, e.Block.Condition)})"); + break; + + case AstBlockType.If: + context.AppendLine($"if ({GetCondExpr(context, e.Block.Condition)})"); + break; + + default: throw new InvalidOperationException($"Found unexpected block type \"{e.Block.Type}\"."); + } + + context.EnterScope(); + }; + + visitor.BlockLeft += (sender, e) => + { + context.LeaveScope(); + + if (e.Block.Type == AstBlockType.DoWhile) + { + context.AppendLine($"while ({GetCondExpr(context, e.Block.Condition)});"); + } + }; + + foreach (IAstNode node in visitor.Visit()) + { + if (node is AstOperation operation) + { + string expr = InstGen.GetExpression(context, operation); + + if (expr != null) + { + context.AppendLine(expr + ";"); + } + } + else if (node is AstAssignment assignment) + { + AggregateType dstType = OperandManager.GetNodeDestType(context, assignment.Destination); + AggregateType srcType = OperandManager.GetNodeDestType(context, assignment.Source); + + string dest = InstGen.GetExpression(context, assignment.Destination); + string src = ReinterpretCast(context, assignment.Source, srcType, dstType); + + context.AppendLine(dest + " = " + src + ";"); + } + else if (node is AstComment comment) + { + context.AppendLine("// " + comment.Comment); + } + else + { + throw new InvalidOperationException($"Found unexpected node type \"{node?.GetType().Name ?? "null"}\"."); + } + } + } + + private static string GetCondExpr(CodeGenContext context, IAstNode cond) + { + AggregateType srcType = OperandManager.GetNodeDestType(context, cond); + + return ReinterpretCast(context, cond, srcType, AggregateType.Bool); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl new file mode 100644 index 00000000..82b76bcc --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl @@ -0,0 +1,21 @@ +int Helper_AtomicMaxS32(int offset, int value) +{ + uint oldValue, newValue; + do + { + oldValue = $SHARED_MEM$[offset]; + newValue = uint(max(int(oldValue), value)); + } while (atomicCompSwap($SHARED_MEM$[offset], oldValue, newValue) != oldValue); + return int(oldValue); +} + +int Helper_AtomicMinS32(int offset, int value) +{ + uint oldValue, newValue; + do + { + oldValue = $SHARED_MEM$[offset]; + newValue = uint(min(int(oldValue), value)); + } while (atomicCompSwap($SHARED_MEM$[offset], oldValue, newValue) != oldValue); + return int(oldValue); +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl new file mode 100644 index 00000000..0862a71b --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl @@ -0,0 +1,21 @@ +int Helper_AtomicMaxS32(int index, int offset, int value) +{ + uint oldValue, newValue; + do + { + oldValue = $STORAGE_MEM$[index].data[offset]; + newValue = uint(max(int(oldValue), value)); + } while (atomicCompSwap($STORAGE_MEM$[index].data[offset], oldValue, newValue) != oldValue); + return int(oldValue); +} + +int Helper_AtomicMinS32(int index, int offset, int value) +{ + uint oldValue, newValue; + do + { + oldValue = $STORAGE_MEM$[index].data[offset]; + newValue = uint(min(int(oldValue), value)); + } while (atomicCompSwap($STORAGE_MEM$[index].data[offset], oldValue, newValue) != oldValue); + return int(oldValue); +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs new file mode 100644 index 00000000..54f35b15 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs @@ -0,0 +1,22 @@ +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl +{ + static class HelperFunctionNames + { + public static string AtomicMaxS32 = "Helper_AtomicMaxS32"; + public static string AtomicMinS32 = "Helper_AtomicMinS32"; + + public static string MultiplyHighS32 = "Helper_MultiplyHighS32"; + public static string MultiplyHighU32 = "Helper_MultiplyHighU32"; + + public static string Shuffle = "Helper_Shuffle"; + public static string ShuffleDown = "Helper_ShuffleDown"; + public static string ShuffleUp = "Helper_ShuffleUp"; + public static string ShuffleXor = "Helper_ShuffleXor"; + public static string SwizzleAdd = "Helper_SwizzleAdd"; + + public static string StoreShared16 = "Helper_StoreShared16"; + public static string StoreShared8 = "Helper_StoreShared8"; + public static string StoreStorage16 = "Helper_StoreStorage16"; + public static string StoreStorage8 = "Helper_StoreStorage8"; + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl new file mode 100644 index 00000000..caad6f56 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl @@ -0,0 +1,7 @@ +int Helper_MultiplyHighS32(int x, int y) +{ + int msb; + int lsb; + imulExtended(x, y, msb, lsb); + return msb; +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighU32.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighU32.glsl new file mode 100644 index 00000000..617a925f --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighU32.glsl @@ -0,0 +1,7 @@ +uint Helper_MultiplyHighU32(uint x, uint y) +{ + uint msb; + uint lsb; + umulExtended(x, y, msb, lsb); + return msb; +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl new file mode 100644 index 00000000..7cb4764d --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl @@ -0,0 +1,11 @@ +float Helper_Shuffle(float x, uint index, uint mask, out bool valid) +{ + uint clamp = mask & 0x1fu; + uint segMask = (mask >> 8) & 0x1fu; + uint minThreadId = $SUBGROUP_INVOCATION$ & segMask; + uint maxThreadId = minThreadId | (clamp & ~segMask); + uint srcThreadId = (index & ~segMask) | minThreadId; + valid = srcThreadId <= maxThreadId; + float v = $SUBGROUP_BROADCAST$(x, srcThreadId); + return valid ? v : x; +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl new file mode 100644 index 00000000..71d901d5 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl @@ -0,0 +1,11 @@ +float Helper_ShuffleDown(float x, uint index, uint mask, out bool valid) +{ + uint clamp = mask & 0x1fu; + uint segMask = (mask >> 8) & 0x1fu; + uint minThreadId = $SUBGROUP_INVOCATION$ & segMask; + uint maxThreadId = minThreadId | (clamp & ~segMask); + uint srcThreadId = $SUBGROUP_INVOCATION$ + index; + valid = srcThreadId <= maxThreadId; + float v = $SUBGROUP_BROADCAST$(x, srcThreadId); + return valid ? v : x; +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl new file mode 100644 index 00000000..ae264d87 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl @@ -0,0 +1,9 @@ +float Helper_ShuffleUp(float x, uint index, uint mask, out bool valid) +{ + uint segMask = (mask >> 8) & 0x1fu; + uint minThreadId = $SUBGROUP_INVOCATION$ & segMask; + uint srcThreadId = $SUBGROUP_INVOCATION$ - index; + valid = int(srcThreadId) >= int(minThreadId); + float v = $SUBGROUP_BROADCAST$(x, srcThreadId); + return valid ? v : x; +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl new file mode 100644 index 00000000..789089d6 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl @@ -0,0 +1,11 @@ +float Helper_ShuffleXor(float x, uint index, uint mask, out bool valid) +{ + uint clamp = mask & 0x1fu; + uint segMask = (mask >> 8) & 0x1fu; + uint minThreadId = $SUBGROUP_INVOCATION$ & segMask; + uint maxThreadId = minThreadId | (clamp & ~segMask); + uint srcThreadId = $SUBGROUP_INVOCATION$ ^ index; + valid = srcThreadId <= maxThreadId; + float v = $SUBGROUP_BROADCAST$(x, srcThreadId); + return valid ? v : x; +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl new file mode 100644 index 00000000..2f57b5ff --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl @@ -0,0 +1,23 @@ +void Helper_StoreShared16(int offset, uint value) +{ + int wordOffset = offset >> 2; + int bitOffset = (offset & 3) * 8; + uint oldValue, newValue; + do + { + oldValue = $SHARED_MEM$[wordOffset]; + newValue = bitfieldInsert(oldValue, value, bitOffset, 16); + } while (atomicCompSwap($SHARED_MEM$[wordOffset], oldValue, newValue) != oldValue); +} + +void Helper_StoreShared8(int offset, uint value) +{ + int wordOffset = offset >> 2; + int bitOffset = (offset & 3) * 8; + uint oldValue, newValue; + do + { + oldValue = $SHARED_MEM$[wordOffset]; + newValue = bitfieldInsert(oldValue, value, bitOffset, 8); + } while (atomicCompSwap($SHARED_MEM$[wordOffset], oldValue, newValue) != oldValue); +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl new file mode 100644 index 00000000..f2253a79 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl @@ -0,0 +1,23 @@ +void Helper_StoreStorage16(int index, int offset, uint value) +{ + int wordOffset = offset >> 2; + int bitOffset = (offset & 3) * 8; + uint oldValue, newValue; + do + { + oldValue = $STORAGE_MEM$[index].data[wordOffset]; + newValue = bitfieldInsert(oldValue, value, bitOffset, 16); + } while (atomicCompSwap($STORAGE_MEM$[index].data[wordOffset], oldValue, newValue) != oldValue); +} + +void Helper_StoreStorage8(int index, int offset, uint value) +{ + int wordOffset = offset >> 2; + int bitOffset = (offset & 3) * 8; + uint oldValue, newValue; + do + { + oldValue = $STORAGE_MEM$[index].data[wordOffset]; + newValue = bitfieldInsert(oldValue, value, bitOffset, 8); + } while (atomicCompSwap($STORAGE_MEM$[index].data[wordOffset], oldValue, newValue) != oldValue); +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl new file mode 100644 index 00000000..057cb6ca --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl @@ -0,0 +1,7 @@ +float Helper_SwizzleAdd(float x, float y, int mask) +{ + vec4 xLut = vec4(1.0, -1.0, 1.0, 0.0); + vec4 yLut = vec4(1.0, 1.0, -1.0, 1.0); + int lutIdx = (mask >> (int($SUBGROUP_INVOCATION$ & 3u) * 2)) & 3; + return x * xLut[lutIdx] + y * yLut[lutIdx]; +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_cp.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_cp.glsl new file mode 100644 index 00000000..4ebade5e --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_cp.glsl @@ -0,0 +1,19 @@ +ivec2 Helper_TexelFetchScale(ivec2 inputVec, int samplerIndex) +{ + float scale = s_render_scale[samplerIndex]; + if (scale == 1.0) + { + return inputVec; + } + return ivec2(vec2(inputVec) * scale); +} + +int Helper_TextureSizeUnscale(int size, int samplerIndex) +{ + float scale = s_render_scale[samplerIndex]; + if (scale == 1.0) + { + return size; + } + return int(float(size) / scale); +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_fp.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_fp.glsl new file mode 100644 index 00000000..6c670f91 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_fp.glsl @@ -0,0 +1,26 @@ +ivec2 Helper_TexelFetchScale(ivec2 inputVec, int samplerIndex) +{ + float scale = s_render_scale[1 + samplerIndex]; + if (scale == 1.0) + { + return inputVec; + } + if (scale < 0.0) // If less than 0, try interpolate between texels by using the screen position. + { + return ivec2(vec2(inputVec) * (-scale) + mod(gl_FragCoord.xy, 0.0 - scale)); + } + else + { + return ivec2(vec2(inputVec) * scale); + } +} + +int Helper_TextureSizeUnscale(int size, int samplerIndex) +{ + float scale = abs(s_render_scale[1 + samplerIndex]); + if (scale == 1.0) + { + return size; + } + return int(float(size) / scale); +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_vp.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_vp.glsl new file mode 100644 index 00000000..19eb119d --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_vp.glsl @@ -0,0 +1,20 @@ +ivec2 Helper_TexelFetchScale(ivec2 inputVec, int samplerIndex) +{ + float scale = abs(s_render_scale[1 + samplerIndex + s_frag_scale_count]); + if (scale == 1.0) + { + return inputVec; + } + + return ivec2(vec2(inputVec) * scale); +} + +int Helper_TextureSizeUnscale(int size, int samplerIndex) +{ + float scale = abs(s_render_scale[1 + samplerIndex + s_frag_scale_count]); + if (scale == 1.0) + { + return size; + } + return int(float(size) / scale); +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs new file mode 100644 index 00000000..01bd11e5 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs @@ -0,0 +1,238 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; + +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenBallot; +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenCall; +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenFSI; +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper; +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenMemory; +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenPacking; +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenVector; +using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions +{ + static class InstGen + { + public static string GetExpression(CodeGenContext context, IAstNode node) + { + if (node is AstOperation operation) + { + return GetExpression(context, operation); + } + else if (node is AstOperand operand) + { + return context.OperandManager.GetExpression(context, operand); + } + + throw new ArgumentException($"Invalid node type \"{node?.GetType().Name ?? "null"}\"."); + } + + public static string Negate(CodeGenContext context, AstOperation operation, InstInfo info) + { + IAstNode src = operation.GetSource(0); + + AggregateType type = GetSrcVarType(operation.Inst, 0); + + string srcExpr = GetSoureExpr(context, src, type); + string zero; + + if (type == AggregateType.FP64) + { + zero = "0.0"; + } + else + { + NumberFormatter.TryFormat(0, type, out zero); + } + + // Starting in the 496.13 NVIDIA driver, there's an issue with assigning variables to negated expressions. + // (-expr) does not work, but (0.0 - expr) does. This should be removed once the issue is resolved. + + return $"{zero} - {Enclose(srcExpr, src, operation.Inst, info, false)}"; + } + + private static string GetExpression(CodeGenContext context, AstOperation operation) + { + Instruction inst = operation.Inst; + + InstInfo info = GetInstructionInfo(inst); + + if ((info.Type & InstType.Call) != 0) + { + bool atomic = (info.Type & InstType.Atomic) != 0; + + int arity = (int)(info.Type & InstType.ArityMask); + + string args = string.Empty; + + for (int argIndex = 0; argIndex < arity; argIndex++) + { + // For shared memory access, the second argument is unused and should be ignored. + // It is there to make both storage and shared access have the same number of arguments. + // For storage, both inputs are consumed when the argument index is 0, so we should skip it here. + if (argIndex == 1 && (atomic || operation.StorageKind == StorageKind.SharedMemory)) + { + continue; + } + + if (argIndex != 0) + { + args += ", "; + } + + if (argIndex == 0 && atomic) + { + switch (operation.StorageKind) + { + case StorageKind.SharedMemory: args += LoadShared(context, operation); break; + case StorageKind.StorageBuffer: args += LoadStorage(context, operation); break; + + default: throw new InvalidOperationException($"Invalid storage kind \"{operation.StorageKind}\"."); + } + } + else + { + AggregateType dstType = GetSrcVarType(inst, argIndex); + + args += GetSoureExpr(context, operation.GetSource(argIndex), dstType); + } + } + + return info.OpName + '(' + args + ')'; + } + else if ((info.Type & InstType.Op) != 0) + { + string op = info.OpName; + + // Return may optionally have a return value (and in this case it is unary). + if (inst == Instruction.Return && operation.SourcesCount != 0) + { + return $"{op} {GetSoureExpr(context, operation.GetSource(0), context.CurrentFunction.ReturnType)}"; + } + + int arity = (int)(info.Type & InstType.ArityMask); + + string[] expr = new string[arity]; + + for (int index = 0; index < arity; index++) + { + IAstNode src = operation.GetSource(index); + + string srcExpr = GetSoureExpr(context, src, GetSrcVarType(inst, index)); + + bool isLhs = arity == 2 && index == 0; + + expr[index] = Enclose(srcExpr, src, inst, info, isLhs); + } + + switch (arity) + { + case 0: + return op; + + case 1: + return op + expr[0]; + + case 2: + return $"{expr[0]} {op} {expr[1]}"; + + case 3: + return $"{expr[0]} {op[0]} {expr[1]} {op[1]} {expr[2]}"; + } + } + else if ((info.Type & InstType.Special) != 0) + { + switch (inst & Instruction.Mask) + { + case Instruction.Ballot: + return Ballot(context, operation); + + case Instruction.Call: + return Call(context, operation); + + case Instruction.FSIBegin: + return FSIBegin(context); + + case Instruction.FSIEnd: + return FSIEnd(context); + + case Instruction.ImageLoad: + case Instruction.ImageStore: + case Instruction.ImageAtomic: + return ImageLoadOrStore(context, operation); + + case Instruction.Load: + return Load(context, operation); + + case Instruction.LoadConstant: + return LoadConstant(context, operation); + + case Instruction.LoadLocal: + return LoadLocal(context, operation); + + case Instruction.LoadShared: + return LoadShared(context, operation); + + case Instruction.LoadStorage: + return LoadStorage(context, operation); + + case Instruction.Lod: + return Lod(context, operation); + + case Instruction.Negate: + return Negate(context, operation, info); + + case Instruction.PackDouble2x32: + return PackDouble2x32(context, operation); + + case Instruction.PackHalf2x16: + return PackHalf2x16(context, operation); + + case Instruction.Store: + return Store(context, operation); + + case Instruction.StoreLocal: + return StoreLocal(context, operation); + + case Instruction.StoreShared: + return StoreShared(context, operation); + + case Instruction.StoreShared16: + return StoreShared16(context, operation); + + case Instruction.StoreShared8: + return StoreShared8(context, operation); + + case Instruction.StoreStorage: + return StoreStorage(context, operation); + + case Instruction.StoreStorage16: + return StoreStorage16(context, operation); + + case Instruction.StoreStorage8: + return StoreStorage8(context, operation); + + case Instruction.TextureSample: + return TextureSample(context, operation); + + case Instruction.TextureSize: + return TextureSize(context, operation); + + case Instruction.UnpackDouble2x32: + return UnpackDouble2x32(context, operation); + + case Instruction.UnpackHalf2x16: + return UnpackHalf2x16(context, operation); + + case Instruction.VectorExtract: + return VectorExtract(context, operation); + } + } + + throw new InvalidOperationException($"Unexpected instruction type \"{info.Type}\"."); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenBallot.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenBallot.cs new file mode 100644 index 00000000..68793c5d --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenBallot.cs @@ -0,0 +1,27 @@ +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper; +using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions +{ + static class InstGenBallot + { + public static string Ballot(CodeGenContext context, AstOperation operation) + { + AggregateType dstType = GetSrcVarType(operation.Inst, 0); + + string arg = GetSoureExpr(context, operation.GetSource(0), dstType); + + if (context.Config.GpuAccessor.QueryHostSupportsShaderBallot()) + { + return $"unpackUint2x32(ballotARB({arg})).x"; + } + else + { + return $"subgroupBallot({arg}).x"; + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenCall.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenCall.cs new file mode 100644 index 00000000..2df6960d --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenCall.cs @@ -0,0 +1,29 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using System.Diagnostics; + +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions +{ + static class InstGenCall + { + public static string Call(CodeGenContext context, AstOperation operation) + { + AstOperand funcId = (AstOperand)operation.GetSource(0); + + Debug.Assert(funcId.Type == OperandType.Constant); + + var function = context.GetFunction(funcId.Value); + + string[] args = new string[operation.SourcesCount - 1]; + + for (int i = 0; i < args.Length; i++) + { + args[i] = GetSoureExpr(context, operation.GetSource(i + 1), function.GetArgumentType(i)); + } + + return $"{function.Name}({string.Join(", ", args)})"; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenFSI.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenFSI.cs new file mode 100644 index 00000000..f61a53cb --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenFSI.cs @@ -0,0 +1,29 @@ +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions +{ + static class InstGenFSI + { + public static string FSIBegin(CodeGenContext context) + { + if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock()) + { + return "beginInvocationInterlockARB()"; + } + else if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderOrderingIntel()) + { + return "beginFragmentShaderOrderingINTEL()"; + } + + return null; + } + + public static string FSIEnd(CodeGenContext context) + { + if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock()) + { + return "endInvocationInterlockARB()"; + } + + return null; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs new file mode 100644 index 00000000..00478f6a --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs @@ -0,0 +1,231 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.TypeConversion; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions +{ + static class InstGenHelper + { + private static readonly InstInfo[] _infoTable; + + static InstGenHelper() + { + _infoTable = new InstInfo[(int)Instruction.Count]; + + Add(Instruction.AtomicAdd, InstType.AtomicBinary, "atomicAdd"); + Add(Instruction.AtomicAnd, InstType.AtomicBinary, "atomicAnd"); + Add(Instruction.AtomicCompareAndSwap, InstType.AtomicTernary, "atomicCompSwap"); + Add(Instruction.AtomicMaxS32, InstType.CallTernary, HelperFunctionNames.AtomicMaxS32); + Add(Instruction.AtomicMaxU32, InstType.AtomicBinary, "atomicMax"); + Add(Instruction.AtomicMinS32, InstType.CallTernary, HelperFunctionNames.AtomicMinS32); + Add(Instruction.AtomicMinU32, InstType.AtomicBinary, "atomicMin"); + Add(Instruction.AtomicOr, InstType.AtomicBinary, "atomicOr"); + Add(Instruction.AtomicSwap, InstType.AtomicBinary, "atomicExchange"); + Add(Instruction.AtomicXor, InstType.AtomicBinary, "atomicXor"); + Add(Instruction.Absolute, InstType.CallUnary, "abs"); + Add(Instruction.Add, InstType.OpBinaryCom, "+", 2); + Add(Instruction.Ballot, InstType.Special); + Add(Instruction.Barrier, InstType.CallNullary, "barrier"); + Add(Instruction.BitCount, InstType.CallUnary, "bitCount"); + Add(Instruction.BitfieldExtractS32, InstType.CallTernary, "bitfieldExtract"); + Add(Instruction.BitfieldExtractU32, InstType.CallTernary, "bitfieldExtract"); + Add(Instruction.BitfieldInsert, InstType.CallQuaternary, "bitfieldInsert"); + Add(Instruction.BitfieldReverse, InstType.CallUnary, "bitfieldReverse"); + Add(Instruction.BitwiseAnd, InstType.OpBinaryCom, "&", 6); + Add(Instruction.BitwiseExclusiveOr, InstType.OpBinaryCom, "^", 7); + Add(Instruction.BitwiseNot, InstType.OpUnary, "~", 0); + Add(Instruction.BitwiseOr, InstType.OpBinaryCom, "|", 8); + Add(Instruction.Call, InstType.Special); + Add(Instruction.Ceiling, InstType.CallUnary, "ceil"); + Add(Instruction.Clamp, InstType.CallTernary, "clamp"); + Add(Instruction.ClampU32, InstType.CallTernary, "clamp"); + Add(Instruction.CompareEqual, InstType.OpBinaryCom, "==", 5); + Add(Instruction.CompareGreater, InstType.OpBinary, ">", 4); + Add(Instruction.CompareGreaterOrEqual, InstType.OpBinary, ">=", 4); + Add(Instruction.CompareGreaterOrEqualU32, InstType.OpBinary, ">=", 4); + Add(Instruction.CompareGreaterU32, InstType.OpBinary, ">", 4); + Add(Instruction.CompareLess, InstType.OpBinary, "<", 4); + Add(Instruction.CompareLessOrEqual, InstType.OpBinary, "<=", 4); + Add(Instruction.CompareLessOrEqualU32, InstType.OpBinary, "<=", 4); + Add(Instruction.CompareLessU32, InstType.OpBinary, "<", 4); + Add(Instruction.CompareNotEqual, InstType.OpBinaryCom, "!=", 5); + Add(Instruction.ConditionalSelect, InstType.OpTernary, "?:", 12); + Add(Instruction.ConvertFP32ToFP64, InstType.CallUnary, "double"); + Add(Instruction.ConvertFP64ToFP32, InstType.CallUnary, "float"); + Add(Instruction.ConvertFP32ToS32, InstType.CallUnary, "int"); + Add(Instruction.ConvertFP32ToU32, InstType.CallUnary, "uint"); + Add(Instruction.ConvertFP64ToS32, InstType.CallUnary, "int"); + Add(Instruction.ConvertFP64ToU32, InstType.CallUnary, "uint"); + Add(Instruction.ConvertS32ToFP32, InstType.CallUnary, "float"); + Add(Instruction.ConvertS32ToFP64, InstType.CallUnary, "double"); + Add(Instruction.ConvertU32ToFP32, InstType.CallUnary, "float"); + Add(Instruction.ConvertU32ToFP64, InstType.CallUnary, "double"); + Add(Instruction.Cosine, InstType.CallUnary, "cos"); + Add(Instruction.Ddx, InstType.CallUnary, "dFdx"); + Add(Instruction.Ddy, InstType.CallUnary, "dFdy"); + Add(Instruction.Discard, InstType.OpNullary, "discard"); + Add(Instruction.Divide, InstType.OpBinary, "/", 1); + Add(Instruction.EmitVertex, InstType.CallNullary, "EmitVertex"); + Add(Instruction.EndPrimitive, InstType.CallNullary, "EndPrimitive"); + Add(Instruction.ExponentB2, InstType.CallUnary, "exp2"); + Add(Instruction.FSIBegin, InstType.Special); + Add(Instruction.FSIEnd, InstType.Special); + Add(Instruction.FindLSB, InstType.CallUnary, "findLSB"); + Add(Instruction.FindMSBS32, InstType.CallUnary, "findMSB"); + Add(Instruction.FindMSBU32, InstType.CallUnary, "findMSB"); + Add(Instruction.Floor, InstType.CallUnary, "floor"); + Add(Instruction.FusedMultiplyAdd, InstType.CallTernary, "fma"); + Add(Instruction.GroupMemoryBarrier, InstType.CallNullary, "groupMemoryBarrier"); + Add(Instruction.ImageLoad, InstType.Special); + Add(Instruction.ImageStore, InstType.Special); + Add(Instruction.ImageAtomic, InstType.Special); + Add(Instruction.IsNan, InstType.CallUnary, "isnan"); + Add(Instruction.Load, InstType.Special); + Add(Instruction.LoadConstant, InstType.Special); + Add(Instruction.LoadLocal, InstType.Special); + Add(Instruction.LoadShared, InstType.Special); + Add(Instruction.LoadStorage, InstType.Special); + Add(Instruction.Lod, InstType.Special); + Add(Instruction.LogarithmB2, InstType.CallUnary, "log2"); + Add(Instruction.LogicalAnd, InstType.OpBinaryCom, "&&", 9); + Add(Instruction.LogicalExclusiveOr, InstType.OpBinaryCom, "^^", 10); + Add(Instruction.LogicalNot, InstType.OpUnary, "!", 0); + Add(Instruction.LogicalOr, InstType.OpBinaryCom, "||", 11); + Add(Instruction.LoopBreak, InstType.OpNullary, "break"); + Add(Instruction.LoopContinue, InstType.OpNullary, "continue"); + Add(Instruction.PackDouble2x32, InstType.Special); + Add(Instruction.PackHalf2x16, InstType.Special); + Add(Instruction.Maximum, InstType.CallBinary, "max"); + Add(Instruction.MaximumU32, InstType.CallBinary, "max"); + Add(Instruction.MemoryBarrier, InstType.CallNullary, "memoryBarrier"); + Add(Instruction.Minimum, InstType.CallBinary, "min"); + Add(Instruction.MinimumU32, InstType.CallBinary, "min"); + Add(Instruction.Multiply, InstType.OpBinaryCom, "*", 1); + Add(Instruction.MultiplyHighS32, InstType.CallBinary, HelperFunctionNames.MultiplyHighS32); + Add(Instruction.MultiplyHighU32, InstType.CallBinary, HelperFunctionNames.MultiplyHighU32); + Add(Instruction.Negate, InstType.Special); + Add(Instruction.ReciprocalSquareRoot, InstType.CallUnary, "inversesqrt"); + Add(Instruction.Return, InstType.OpNullary, "return"); + Add(Instruction.Round, InstType.CallUnary, "roundEven"); + Add(Instruction.ShiftLeft, InstType.OpBinary, "<<", 3); + Add(Instruction.ShiftRightS32, InstType.OpBinary, ">>", 3); + Add(Instruction.ShiftRightU32, InstType.OpBinary, ">>", 3); + Add(Instruction.Shuffle, InstType.CallQuaternary, HelperFunctionNames.Shuffle); + Add(Instruction.ShuffleDown, InstType.CallQuaternary, HelperFunctionNames.ShuffleDown); + Add(Instruction.ShuffleUp, InstType.CallQuaternary, HelperFunctionNames.ShuffleUp); + Add(Instruction.ShuffleXor, InstType.CallQuaternary, HelperFunctionNames.ShuffleXor); + Add(Instruction.Sine, InstType.CallUnary, "sin"); + Add(Instruction.SquareRoot, InstType.CallUnary, "sqrt"); + Add(Instruction.Store, InstType.Special); + Add(Instruction.StoreLocal, InstType.Special); + Add(Instruction.StoreShared, InstType.Special); + Add(Instruction.StoreShared16, InstType.Special); + Add(Instruction.StoreShared8, InstType.Special); + Add(Instruction.StoreStorage, InstType.Special); + Add(Instruction.StoreStorage16, InstType.Special); + Add(Instruction.StoreStorage8, InstType.Special); + Add(Instruction.Subtract, InstType.OpBinary, "-", 2); + Add(Instruction.SwizzleAdd, InstType.CallTernary, HelperFunctionNames.SwizzleAdd); + Add(Instruction.TextureSample, InstType.Special); + Add(Instruction.TextureSize, InstType.Special); + Add(Instruction.Truncate, InstType.CallUnary, "trunc"); + Add(Instruction.UnpackDouble2x32, InstType.Special); + Add(Instruction.UnpackHalf2x16, InstType.Special); + Add(Instruction.VectorExtract, InstType.Special); + Add(Instruction.VoteAll, InstType.CallUnary, "allInvocationsARB"); + Add(Instruction.VoteAllEqual, InstType.CallUnary, "allInvocationsEqualARB"); + Add(Instruction.VoteAny, InstType.CallUnary, "anyInvocationARB"); + } + + private static void Add(Instruction inst, InstType flags, string opName = null, int precedence = 0) + { + _infoTable[(int)inst] = new InstInfo(flags, opName, precedence); + } + + public static InstInfo GetInstructionInfo(Instruction inst) + { + return _infoTable[(int)(inst & Instruction.Mask)]; + } + + public static string GetSoureExpr(CodeGenContext context, IAstNode node, AggregateType dstType) + { + return ReinterpretCast(context, node, OperandManager.GetNodeDestType(context, node), dstType); + } + + public static string Enclose(string expr, IAstNode node, Instruction pInst, bool isLhs) + { + InstInfo pInfo = GetInstructionInfo(pInst); + + return Enclose(expr, node, pInst, pInfo, isLhs); + } + + public static string Enclose(string expr, IAstNode node, Instruction pInst, InstInfo pInfo, bool isLhs = false) + { + if (NeedsParenthesis(node, pInst, pInfo, isLhs)) + { + expr = "(" + expr + ")"; + } + + return expr; + } + + public static bool NeedsParenthesis(IAstNode node, Instruction pInst, InstInfo pInfo, bool isLhs) + { + // If the node isn't a operation, then it can only be a operand, + // and those never needs to be surrounded in parenthesis. + if (!(node is AstOperation operation)) + { + // This is sort of a special case, if this is a negative constant, + // and it is consumed by a unary operation, we need to put on the parenthesis, + // as in GLSL a sequence like --2 or ~-1 is not valid. + if (IsNegativeConst(node) && pInfo.Type == InstType.OpUnary) + { + return true; + } + + return false; + } + + if ((pInfo.Type & (InstType.Call | InstType.Special)) != 0) + { + return false; + } + + InstInfo info = _infoTable[(int)(operation.Inst & Instruction.Mask)]; + + if ((info.Type & (InstType.Call | InstType.Special)) != 0) + { + return false; + } + + if (info.Precedence < pInfo.Precedence) + { + return false; + } + + if (info.Precedence == pInfo.Precedence && isLhs) + { + return false; + } + + if (pInst == operation.Inst && info.Type == InstType.OpBinaryCom) + { + return false; + } + + return true; + } + + private static bool IsNegativeConst(IAstNode node) + { + if (!(node is AstOperand operand)) + { + return false; + } + + return operand.Type == OperandType.Constant && operand.Value < 0; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs new file mode 100644 index 00000000..99519837 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs @@ -0,0 +1,939 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Text; + +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper; +using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions +{ + static class InstGenMemory + { + public static string ImageLoadOrStore(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + + // TODO: Bindless texture support. For now we just return 0/do nothing. + if (isBindless) + { + switch (texOp.Inst) + { + case Instruction.ImageStore: + return "// imageStore(bindless)"; + case Instruction.ImageLoad: + AggregateType componentType = texOp.Format.GetComponentType(); + + NumberFormatter.TryFormat(0, componentType, out string imageConst); + + AggregateType outputType = texOp.GetVectorType(componentType); + + if ((outputType & AggregateType.ElementCountMask) != 0) + { + return $"{Declarations.GetVarTypeName(context, outputType, precise: false)}({imageConst})"; + } + + return imageConst; + default: + return NumberFormatter.FormatInt(0); + } + } + + bool isArray = (texOp.Type & SamplerType.Array) != 0; + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + + var texCallBuilder = new StringBuilder(); + + if (texOp.Inst == Instruction.ImageAtomic) + { + texCallBuilder.Append((texOp.Flags & TextureFlags.AtomicMask) switch { + TextureFlags.Add => "imageAtomicAdd", + TextureFlags.Minimum => "imageAtomicMin", + TextureFlags.Maximum => "imageAtomicMax", + TextureFlags.Increment => "imageAtomicAdd", // TODO: Clamp value. + TextureFlags.Decrement => "imageAtomicAdd", // TODO: Clamp value. + TextureFlags.BitwiseAnd => "imageAtomicAnd", + TextureFlags.BitwiseOr => "imageAtomicOr", + TextureFlags.BitwiseXor => "imageAtomicXor", + TextureFlags.Swap => "imageAtomicExchange", + TextureFlags.CAS => "imageAtomicCompSwap", + _ => "imageAtomicAdd", + }); + } + else + { + texCallBuilder.Append(texOp.Inst == Instruction.ImageLoad ? "imageLoad" : "imageStore"); + } + + int srcIndex = isBindless ? 1 : 0; + + string Src(AggregateType type) + { + return GetSoureExpr(context, texOp.GetSource(srcIndex++), type); + } + + string indexExpr = null; + + if (isIndexed) + { + indexExpr = Src(AggregateType.S32); + } + + string imageName = OperandManager.GetImageName(context.Config.Stage, texOp, indexExpr); + + texCallBuilder.Append('('); + texCallBuilder.Append(imageName); + + int coordsCount = texOp.Type.GetDimensions(); + + int pCount = coordsCount + (isArray ? 1 : 0); + + void Append(string str) + { + texCallBuilder.Append(", "); + texCallBuilder.Append(str); + } + + string ApplyScaling(string vector) + { + if (context.Config.Stage.SupportsRenderScale() && + texOp.Inst == Instruction.ImageLoad && + !isBindless && + !isIndexed) + { + // Image scales start after texture ones. + int scaleIndex = context.Config.GetTextureDescriptors().Length + context.Config.FindImageDescriptorIndex(texOp); + + if (pCount == 3 && isArray) + { + // The array index is not scaled, just x and y. + vector = $"ivec3(Helper_TexelFetchScale(({vector}).xy, {scaleIndex}), ({vector}).z)"; + } + else if (pCount == 2 && !isArray) + { + vector = $"Helper_TexelFetchScale({vector}, {scaleIndex})"; + } + } + + return vector; + } + + if (pCount > 1) + { + string[] elems = new string[pCount]; + + for (int index = 0; index < pCount; index++) + { + elems[index] = Src(AggregateType.S32); + } + + Append(ApplyScaling($"ivec{pCount}({string.Join(", ", elems)})")); + } + else + { + Append(Src(AggregateType.S32)); + } + + if (texOp.Inst == Instruction.ImageStore) + { + AggregateType type = texOp.Format.GetComponentType(); + + string[] cElems = new string[4]; + + for (int index = 0; index < 4; index++) + { + if (srcIndex < texOp.SourcesCount) + { + cElems[index] = Src(type); + } + else + { + cElems[index] = type switch + { + AggregateType.S32 => NumberFormatter.FormatInt(0), + AggregateType.U32 => NumberFormatter.FormatUint(0), + _ => NumberFormatter.FormatFloat(0) + }; + } + } + + string prefix = type switch + { + AggregateType.S32 => "i", + AggregateType.U32 => "u", + _ => string.Empty + }; + + Append($"{prefix}vec4({string.Join(", ", cElems)})"); + } + + if (texOp.Inst == Instruction.ImageAtomic) + { + AggregateType type = texOp.Format.GetComponentType(); + + if ((texOp.Flags & TextureFlags.AtomicMask) == TextureFlags.CAS) + { + Append(Src(type)); // Compare value. + } + + string value = (texOp.Flags & TextureFlags.AtomicMask) switch + { + TextureFlags.Increment => NumberFormatter.FormatInt(1, type), // TODO: Clamp value + TextureFlags.Decrement => NumberFormatter.FormatInt(-1, type), // TODO: Clamp value + _ => Src(type) + }; + + Append(value); + + texCallBuilder.Append(')'); + + if (type != AggregateType.S32) + { + texCallBuilder + .Insert(0, "int(") + .Append(')'); + } + } + else + { + texCallBuilder.Append(')'); + + if (texOp.Inst == Instruction.ImageLoad) + { + texCallBuilder.Append(GetMaskMultiDest(texOp.Index)); + } + } + + return texCallBuilder.ToString(); + } + + public static string Load(CodeGenContext context, AstOperation operation) + { + return GenerateLoadOrStore(context, operation, isStore: false); + } + + public static string LoadConstant(CodeGenContext context, AstOperation operation) + { + IAstNode src1 = operation.GetSource(0); + IAstNode src2 = operation.GetSource(1); + + string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1)); + offsetExpr = Enclose(offsetExpr, src2, Instruction.ShiftRightS32, isLhs: true); + + var config = context.Config; + bool indexElement = !config.GpuAccessor.QueryHostHasVectorIndexingBug(); + + if (src1 is AstOperand operand && operand.Type == OperandType.Constant) + { + bool cbIndexable = config.UsedFeatures.HasFlag(Translation.FeatureFlags.CbIndexing); + return OperandManager.GetConstantBufferName(operand.Value, offsetExpr, config.Stage, cbIndexable, indexElement); + } + else + { + string slotExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); + return OperandManager.GetConstantBufferName(slotExpr, offsetExpr, config.Stage, indexElement); + } + } + + public static string LoadLocal(CodeGenContext context, AstOperation operation) + { + return LoadLocalOrShared(context, operation, DefaultNames.LocalMemoryName); + } + + public static string LoadShared(CodeGenContext context, AstOperation operation) + { + return LoadLocalOrShared(context, operation, DefaultNames.SharedMemoryName); + } + + private static string LoadLocalOrShared(CodeGenContext context, AstOperation operation, string arrayName) + { + IAstNode src1 = operation.GetSource(0); + + string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); + + return $"{arrayName}[{offsetExpr}]"; + } + + public static string LoadStorage(CodeGenContext context, AstOperation operation) + { + IAstNode src1 = operation.GetSource(0); + IAstNode src2 = operation.GetSource(1); + + string indexExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); + string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1)); + + return GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage); + } + + public static string Lod(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + int coordsCount = texOp.Type.GetDimensions(); + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + + // TODO: Bindless texture support. For now we just return 0. + if (isBindless) + { + return NumberFormatter.FormatFloat(0); + } + + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + + string indexExpr = null; + + if (isIndexed) + { + indexExpr = GetSoureExpr(context, texOp.GetSource(0), AggregateType.S32); + } + + string samplerName = OperandManager.GetSamplerName(context.Config.Stage, texOp, indexExpr); + + int coordsIndex = isBindless || isIndexed ? 1 : 0; + + string coordsExpr; + + if (coordsCount > 1) + { + string[] elems = new string[coordsCount]; + + for (int index = 0; index < coordsCount; index++) + { + elems[index] = GetSoureExpr(context, texOp.GetSource(coordsIndex + index), AggregateType.FP32); + } + + coordsExpr = "vec" + coordsCount + "(" + string.Join(", ", elems) + ")"; + } + else + { + coordsExpr = GetSoureExpr(context, texOp.GetSource(coordsIndex), AggregateType.FP32); + } + + return $"textureQueryLod({samplerName}, {coordsExpr}){GetMask(texOp.Index)}"; + } + + public static string Store(CodeGenContext context, AstOperation operation) + { + return GenerateLoadOrStore(context, operation, isStore: true); + } + + public static string StoreLocal(CodeGenContext context, AstOperation operation) + { + return StoreLocalOrShared(context, operation, DefaultNames.LocalMemoryName); + } + + public static string StoreShared(CodeGenContext context, AstOperation operation) + { + return StoreLocalOrShared(context, operation, DefaultNames.SharedMemoryName); + } + + private static string StoreLocalOrShared(CodeGenContext context, AstOperation operation, string arrayName) + { + IAstNode src1 = operation.GetSource(0); + IAstNode src2 = operation.GetSource(1); + + string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); + + AggregateType srcType = OperandManager.GetNodeDestType(context, src2); + + string src = TypeConversion.ReinterpretCast(context, src2, srcType, AggregateType.U32); + + return $"{arrayName}[{offsetExpr}] = {src}"; + } + + public static string StoreShared16(CodeGenContext context, AstOperation operation) + { + IAstNode src1 = operation.GetSource(0); + IAstNode src2 = operation.GetSource(1); + + string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); + + AggregateType srcType = OperandManager.GetNodeDestType(context, src2); + + string src = TypeConversion.ReinterpretCast(context, src2, srcType, AggregateType.U32); + + return $"{HelperFunctionNames.StoreShared16}({offsetExpr}, {src})"; + } + + public static string StoreShared8(CodeGenContext context, AstOperation operation) + { + IAstNode src1 = operation.GetSource(0); + IAstNode src2 = operation.GetSource(1); + + string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); + + AggregateType srcType = OperandManager.GetNodeDestType(context, src2); + + string src = TypeConversion.ReinterpretCast(context, src2, srcType, AggregateType.U32); + + return $"{HelperFunctionNames.StoreShared8}({offsetExpr}, {src})"; + } + + public static string StoreStorage(CodeGenContext context, AstOperation operation) + { + IAstNode src1 = operation.GetSource(0); + IAstNode src2 = operation.GetSource(1); + IAstNode src3 = operation.GetSource(2); + + string indexExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); + string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1)); + + AggregateType srcType = OperandManager.GetNodeDestType(context, src3); + + string src = TypeConversion.ReinterpretCast(context, src3, srcType, AggregateType.U32); + + string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage); + + return $"{sb} = {src}"; + } + + public static string StoreStorage16(CodeGenContext context, AstOperation operation) + { + IAstNode src1 = operation.GetSource(0); + IAstNode src2 = operation.GetSource(1); + IAstNode src3 = operation.GetSource(2); + + string indexExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); + string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1)); + + AggregateType srcType = OperandManager.GetNodeDestType(context, src3); + + string src = TypeConversion.ReinterpretCast(context, src3, srcType, AggregateType.U32); + + string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage); + + return $"{HelperFunctionNames.StoreStorage16}({indexExpr}, {offsetExpr}, {src})"; + } + + public static string StoreStorage8(CodeGenContext context, AstOperation operation) + { + IAstNode src1 = operation.GetSource(0); + IAstNode src2 = operation.GetSource(1); + IAstNode src3 = operation.GetSource(2); + + string indexExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); + string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1)); + + AggregateType srcType = OperandManager.GetNodeDestType(context, src3); + + string src = TypeConversion.ReinterpretCast(context, src3, srcType, AggregateType.U32); + + string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage); + + return $"{HelperFunctionNames.StoreStorage8}({indexExpr}, {offsetExpr}, {src})"; + } + + public static string TextureSample(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + bool isGather = (texOp.Flags & TextureFlags.Gather) != 0; + bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0; + bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0; + bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0; + bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0; + bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0; + bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0; + + bool isArray = (texOp.Type & SamplerType.Array) != 0; + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0; + bool isShadow = (texOp.Type & SamplerType.Shadow) != 0; + + bool colorIsVector = isGather || !isShadow; + + SamplerType type = texOp.Type & SamplerType.Mask; + + bool is2D = type == SamplerType.Texture2D; + bool isCube = type == SamplerType.TextureCube; + + // 2D Array and Cube shadow samplers with LOD level or bias requires an extension. + // If the extension is not supported, just remove the LOD parameter. + if (isArray && isShadow && (is2D || isCube) && !context.Config.GpuAccessor.QueryHostSupportsTextureShadowLod()) + { + hasLodBias = false; + hasLodLevel = false; + } + + // Cube shadow samplers with LOD level requires an extension. + // If the extension is not supported, just remove the LOD level parameter. + if (isShadow && isCube && !context.Config.GpuAccessor.QueryHostSupportsTextureShadowLod()) + { + hasLodLevel = false; + } + + // TODO: Bindless texture support. For now we just return 0. + if (isBindless) + { + string scalarValue = NumberFormatter.FormatFloat(0); + + if (colorIsVector) + { + AggregateType outputType = texOp.GetVectorType(AggregateType.FP32); + + if ((outputType & AggregateType.ElementCountMask) != 0) + { + return $"{Declarations.GetVarTypeName(context, outputType, precise: false)}({scalarValue})"; + } + } + + return scalarValue; + } + + string texCall = intCoords ? "texelFetch" : "texture"; + + if (isGather) + { + texCall += "Gather"; + } + else if (hasDerivatives) + { + texCall += "Grad"; + } + else if (hasLodLevel && !intCoords) + { + texCall += "Lod"; + } + + if (hasOffset) + { + texCall += "Offset"; + } + else if (hasOffsets) + { + texCall += "Offsets"; + } + + int srcIndex = isBindless ? 1 : 0; + + string Src(AggregateType type) + { + return GetSoureExpr(context, texOp.GetSource(srcIndex++), type); + } + + string indexExpr = null; + + if (isIndexed) + { + indexExpr = Src(AggregateType.S32); + } + + string samplerName = OperandManager.GetSamplerName(context.Config.Stage, texOp, indexExpr); + + texCall += "(" + samplerName; + + int coordsCount = texOp.Type.GetDimensions(); + + int pCount = coordsCount; + + int arrayIndexElem = -1; + + if (isArray) + { + arrayIndexElem = pCount++; + } + + // The sampler 1D shadow overload expects a + // dummy value on the middle of the vector, who knows why... + bool hasDummy1DShadowElem = texOp.Type == (SamplerType.Texture1D | SamplerType.Shadow); + + if (hasDummy1DShadowElem) + { + pCount++; + } + + if (isShadow && !isGather) + { + pCount++; + } + + // On textureGather*, the comparison value is + // always specified as an extra argument. + bool hasExtraCompareArg = isShadow && isGather; + + if (pCount == 5) + { + pCount = 4; + + hasExtraCompareArg = true; + } + + void Append(string str) + { + texCall += ", " + str; + } + + AggregateType coordType = intCoords ? AggregateType.S32 : AggregateType.FP32; + + string AssemblePVector(int count) + { + if (count > 1) + { + string[] elems = new string[count]; + + for (int index = 0; index < count; index++) + { + if (arrayIndexElem == index) + { + elems[index] = Src(AggregateType.S32); + + if (!intCoords) + { + elems[index] = "float(" + elems[index] + ")"; + } + } + else if (index == 1 && hasDummy1DShadowElem) + { + elems[index] = NumberFormatter.FormatFloat(0); + } + else + { + elems[index] = Src(coordType); + } + } + + string prefix = intCoords ? "i" : string.Empty; + + return prefix + "vec" + count + "(" + string.Join(", ", elems) + ")"; + } + else + { + return Src(coordType); + } + } + + string ApplyScaling(string vector) + { + if (intCoords) + { + if (context.Config.Stage.SupportsRenderScale() && + !isBindless && + !isIndexed) + { + int index = context.Config.FindTextureDescriptorIndex(texOp); + + if (pCount == 3 && isArray) + { + // The array index is not scaled, just x and y. + vector = "ivec3(Helper_TexelFetchScale((" + vector + ").xy, " + index + "), (" + vector + ").z)"; + } + else if (pCount == 2 && !isArray) + { + vector = "Helper_TexelFetchScale(" + vector + ", " + index + ")"; + } + } + } + + return vector; + } + + string ApplyBias(string vector) + { + int gatherBiasPrecision = context.Config.GpuAccessor.QueryHostGatherBiasPrecision(); + if (isGather && gatherBiasPrecision != 0) + { + // GPU requires texture gather to be slightly offset to match NVIDIA behaviour when point is exactly between two texels. + // Offset by the gather precision divided by 2 to correct for rounding. + + if (pCount == 1) + { + vector = $"{vector} + (1.0 / (float(textureSize({samplerName}, 0)) * float({1 << (gatherBiasPrecision + 1)})))"; + } + else + { + vector = $"{vector} + (1.0 / (vec{pCount}(textureSize({samplerName}, 0).{"xyz".Substring(0, pCount)}) * float({1 << (gatherBiasPrecision + 1)})))"; + } + } + + return vector; + } + + Append(ApplyBias(ApplyScaling(AssemblePVector(pCount)))); + + string AssembleDerivativesVector(int count) + { + if (count > 1) + { + string[] elems = new string[count]; + + for (int index = 0; index < count; index++) + { + elems[index] = Src(AggregateType.FP32); + } + + return "vec" + count + "(" + string.Join(", ", elems) + ")"; + } + else + { + return Src(AggregateType.FP32); + } + } + + if (hasExtraCompareArg) + { + Append(Src(AggregateType.FP32)); + } + + if (hasDerivatives) + { + Append(AssembleDerivativesVector(coordsCount)); // dPdx + Append(AssembleDerivativesVector(coordsCount)); // dPdy + } + + if (isMultisample) + { + Append(Src(AggregateType.S32)); + } + else if (hasLodLevel) + { + Append(Src(coordType)); + } + + string AssembleOffsetVector(int count) + { + if (count > 1) + { + string[] elems = new string[count]; + + for (int index = 0; index < count; index++) + { + elems[index] = Src(AggregateType.S32); + } + + return "ivec" + count + "(" + string.Join(", ", elems) + ")"; + } + else + { + return Src(AggregateType.S32); + } + } + + if (hasOffset) + { + Append(AssembleOffsetVector(coordsCount)); + } + else if (hasOffsets) + { + texCall += $", ivec{coordsCount}[4]("; + + texCall += AssembleOffsetVector(coordsCount) + ", "; + texCall += AssembleOffsetVector(coordsCount) + ", "; + texCall += AssembleOffsetVector(coordsCount) + ", "; + texCall += AssembleOffsetVector(coordsCount) + ")"; + } + + if (hasLodBias) + { + Append(Src(AggregateType.FP32)); + } + + // textureGather* optional extra component index, + // not needed for shadow samplers. + if (isGather && !isShadow) + { + Append(Src(AggregateType.S32)); + } + + texCall += ")" + (colorIsVector ? GetMaskMultiDest(texOp.Index) : ""); + + return texCall; + } + + public static string TextureSize(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + + // TODO: Bindless texture support. For now we just return 0. + if (isBindless) + { + return NumberFormatter.FormatInt(0); + } + + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + + string indexExpr = null; + + if (isIndexed) + { + indexExpr = GetSoureExpr(context, texOp.GetSource(0), AggregateType.S32); + } + + string samplerName = OperandManager.GetSamplerName(context.Config.Stage, texOp, indexExpr); + + if (texOp.Index == 3) + { + return $"textureQueryLevels({samplerName})"; + } + else + { + (TextureDescriptor descriptor, int descriptorIndex) = context.Config.FindTextureDescriptor(texOp); + bool hasLod = !descriptor.Type.HasFlag(SamplerType.Multisample) && descriptor.Type != SamplerType.TextureBuffer; + string texCall; + + if (hasLod) + { + int lodSrcIndex = isBindless || isIndexed ? 1 : 0; + IAstNode lod = operation.GetSource(lodSrcIndex); + string lodExpr = GetSoureExpr(context, lod, GetSrcVarType(operation.Inst, lodSrcIndex)); + + texCall = $"textureSize({samplerName}, {lodExpr}){GetMask(texOp.Index)}"; + } + else + { + texCall = $"textureSize({samplerName}){GetMask(texOp.Index)}"; + } + + if (context.Config.Stage.SupportsRenderScale() && + (texOp.Index < 2 || (texOp.Type & SamplerType.Mask) == SamplerType.Texture3D) && + !isBindless && + !isIndexed) + { + texCall = $"Helper_TextureSizeUnscale({texCall}, {descriptorIndex})"; + } + + return texCall; + } + } + + private static string GenerateLoadOrStore(CodeGenContext context, AstOperation operation, bool isStore) + { + StorageKind storageKind = operation.StorageKind; + + string varName; + AggregateType varType; + int srcIndex = 0; + + switch (storageKind) + { + case StorageKind.Input: + case StorageKind.InputPerPatch: + case StorageKind.Output: + case StorageKind.OutputPerPatch: + if (!(operation.GetSource(srcIndex++) is AstOperand varId) || varId.Type != OperandType.Constant) + { + throw new InvalidOperationException($"First input of {operation.Inst} with {storageKind} storage must be a constant operand."); + } + + IoVariable ioVariable = (IoVariable)varId.Value; + bool isOutput = storageKind.IsOutput(); + bool isPerPatch = storageKind.IsPerPatch(); + int location = -1; + int component = 0; + + if (context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput)) + { + if (!(operation.GetSource(srcIndex++) is AstOperand vecIndex) || vecIndex.Type != OperandType.Constant) + { + throw new InvalidOperationException($"Second input of {operation.Inst} with {storageKind} storage must be a constant operand."); + } + + location = vecIndex.Value; + + if (operation.SourcesCount > srcIndex && + operation.GetSource(srcIndex) is AstOperand elemIndex && + elemIndex.Type == OperandType.Constant && + context.Config.HasPerLocationInputOrOutputComponent(ioVariable, location, elemIndex.Value, isOutput)) + { + component = elemIndex.Value; + srcIndex++; + } + } + + (varName, varType) = IoMap.GetGlslVariable(context.Config, ioVariable, location, component, isOutput, isPerPatch); + + if (IoMap.IsPerVertexBuiltIn(context.Config.Stage, ioVariable, isOutput)) + { + // Since those exist both as input and output on geometry and tessellation shaders, + // we need the gl_in and gl_out prefixes to disambiguate. + + if (storageKind == StorageKind.Input) + { + string expr = GetSoureExpr(context, operation.GetSource(srcIndex++), AggregateType.S32); + varName = $"gl_in[{expr}].{varName}"; + } + else if (storageKind == StorageKind.Output) + { + string expr = GetSoureExpr(context, operation.GetSource(srcIndex++), AggregateType.S32); + varName = $"gl_out[{expr}].{varName}"; + } + } + + int firstSrcIndex = srcIndex; + int inputsCount = isStore ? operation.SourcesCount - 1 : operation.SourcesCount; + + for (; srcIndex < inputsCount; srcIndex++) + { + IAstNode src = operation.GetSource(srcIndex); + + if ((varType & AggregateType.ElementCountMask) != 0 && + srcIndex == inputsCount - 1 && + src is AstOperand elementIndex && + elementIndex.Type == OperandType.Constant) + { + varName += "." + "xyzw"[elementIndex.Value & 3]; + } + else if (srcIndex == firstSrcIndex && context.Config.Stage == ShaderStage.TessellationControl && storageKind == StorageKind.Output) + { + // GLSL requires that for tessellation control shader outputs, + // that the index expression must be *exactly* "gl_InvocationID", + // otherwise the compilation fails. + // TODO: Get rid of this and use expression propagation to make sure we generate the correct code from IR. + varName += "[gl_InvocationID]"; + } + else + { + varName += $"[{GetSoureExpr(context, src, AggregateType.S32)}]"; + } + } + break; + + default: + throw new InvalidOperationException($"Invalid storage kind {storageKind}."); + } + + if (isStore) + { + varType &= AggregateType.ElementTypeMask; + varName = $"{varName} = {GetSoureExpr(context, operation.GetSource(srcIndex), varType)}"; + } + + return varName; + } + + private static string GetStorageBufferAccessor(string slotExpr, string offsetExpr, ShaderStage stage) + { + string sbName = OperandManager.GetShaderStagePrefix(stage); + + sbName += "_" + DefaultNames.StorageNamePrefix; + + return $"{sbName}[{slotExpr}].{DefaultNames.DataName}[{offsetExpr}]"; + } + + private static string GetMask(int index) + { + return $".{"rgba".AsSpan(index, 1)}"; + } + + private static string GetMaskMultiDest(int mask) + { + string swizzle = "."; + + for (int i = 0; i < 4; i++) + { + if ((mask & (1 << i)) != 0) + { + swizzle += "xyzw"[i]; + } + } + + return swizzle; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenPacking.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenPacking.cs new file mode 100644 index 00000000..5a888e9c --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenPacking.cs @@ -0,0 +1,56 @@ +using Ryujinx.Graphics.Shader.StructuredIr; +using System; + +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper; +using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions +{ + static class InstGenPacking + { + public static string PackDouble2x32(CodeGenContext context, AstOperation operation) + { + IAstNode src0 = operation.GetSource(0); + IAstNode src1 = operation.GetSource(1); + + string src0Expr = GetSoureExpr(context, src0, GetSrcVarType(operation.Inst, 0)); + string src1Expr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 1)); + + return $"packDouble2x32(uvec2({src0Expr}, {src1Expr}))"; + } + + public static string PackHalf2x16(CodeGenContext context, AstOperation operation) + { + IAstNode src0 = operation.GetSource(0); + IAstNode src1 = operation.GetSource(1); + + string src0Expr = GetSoureExpr(context, src0, GetSrcVarType(operation.Inst, 0)); + string src1Expr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 1)); + + return $"packHalf2x16(vec2({src0Expr}, {src1Expr}))"; + } + + public static string UnpackDouble2x32(CodeGenContext context, AstOperation operation) + { + IAstNode src = operation.GetSource(0); + + string srcExpr = GetSoureExpr(context, src, GetSrcVarType(operation.Inst, 0)); + + return $"unpackDouble2x32({srcExpr}){GetMask(operation.Index)}"; + } + + public static string UnpackHalf2x16(CodeGenContext context, AstOperation operation) + { + IAstNode src = operation.GetSource(0); + + string srcExpr = GetSoureExpr(context, src, GetSrcVarType(operation.Inst, 0)); + + return $"unpackHalf2x16({srcExpr}){GetMask(operation.Index)}"; + } + + private static string GetMask(int index) + { + return $".{"xy".AsSpan(index, 1)}"; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenVector.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenVector.cs new file mode 100644 index 00000000..f09ea2e8 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenVector.cs @@ -0,0 +1,32 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; + +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper; +using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions +{ + static class InstGenVector + { + public static string VectorExtract(CodeGenContext context, AstOperation operation) + { + IAstNode vector = operation.GetSource(0); + IAstNode index = operation.GetSource(1); + + string vectorExpr = GetSoureExpr(context, vector, OperandManager.GetNodeDestType(context, vector)); + + if (index is AstOperand indexOperand && indexOperand.Type == OperandType.Constant) + { + char elem = "xyzw"[indexOperand.Value]; + + return $"{vectorExpr}.{elem}"; + } + else + { + string indexExpr = GetSoureExpr(context, index, GetSrcVarType(operation.Inst, 1)); + + return $"{vectorExpr}[{indexExpr}]"; + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstInfo.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstInfo.cs new file mode 100644 index 00000000..7b2a6b46 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstInfo.cs @@ -0,0 +1,18 @@ +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions +{ + readonly struct InstInfo + { + public InstType Type { get; } + + public string OpName { get; } + + public int Precedence { get; } + + public InstInfo(InstType type, string opName, int precedence) + { + Type = type; + OpName = opName; + Precedence = precedence; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstType.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstType.cs new file mode 100644 index 00000000..84e36cdd --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstType.cs @@ -0,0 +1,33 @@ +using System; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions +{ + [Flags] + enum InstType + { + OpNullary = Op | 0, + OpUnary = Op | 1, + OpBinary = Op | 2, + OpBinaryCom = Op | 2 | Commutative, + OpTernary = Op | 3, + + CallNullary = Call | 0, + CallUnary = Call | 1, + CallBinary = Call | 2, + CallTernary = Call | 3, + CallQuaternary = Call | 4, + + // The atomic instructions have one extra operand, + // for the storage slot and offset pair. + AtomicBinary = Call | Atomic | 3, + AtomicTernary = Call | Atomic | 4, + + Commutative = 1 << 8, + Op = 1 << 9, + Call = 1 << 10, + Atomic = 1 << 11, + Special = 1 << 12, + + ArityMask = 0xff + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/IoMap.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/IoMap.cs new file mode 100644 index 00000000..093ee232 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/IoMap.cs @@ -0,0 +1,145 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System.Globalization; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions +{ + static class IoMap + { + public static (string, AggregateType) GetGlslVariable( + ShaderConfig config, + IoVariable ioVariable, + int location, + int component, + bool isOutput, + bool isPerPatch) + { + return ioVariable switch + { + IoVariable.BackColorDiffuse => ("gl_BackColor", AggregateType.Vector4 | AggregateType.FP32), // Deprecated. + IoVariable.BackColorSpecular => ("gl_BackSecondaryColor", AggregateType.Vector4 | AggregateType.FP32), // Deprecated. + IoVariable.BaseInstance => ("gl_BaseInstanceARB", AggregateType.S32), + IoVariable.BaseVertex => ("gl_BaseVertexARB", AggregateType.S32), + IoVariable.ClipDistance => ("gl_ClipDistance", AggregateType.Array | AggregateType.FP32), + IoVariable.CtaId => ("gl_WorkGroupID", AggregateType.Vector3 | AggregateType.U32), + IoVariable.DrawIndex => ("gl_DrawIDARB", AggregateType.S32), + IoVariable.FogCoord => ("gl_FogFragCoord", AggregateType.FP32), // Deprecated. + IoVariable.FragmentCoord => ("gl_FragCoord", AggregateType.Vector4 | AggregateType.FP32), + IoVariable.FragmentOutputColor => GetFragmentOutputColorVariableName(config, location), + IoVariable.FragmentOutputDepth => ("gl_FragDepth", AggregateType.FP32), + IoVariable.FragmentOutputIsBgra => (DefaultNames.SupportBlockIsBgraName, AggregateType.Array | AggregateType.Bool), + IoVariable.FrontColorDiffuse => ("gl_FrontColor", AggregateType.Vector4 | AggregateType.FP32), // Deprecated. + IoVariable.FrontColorSpecular => ("gl_FrontSecondaryColor", AggregateType.Vector4 | AggregateType.FP32), // Deprecated. + IoVariable.FrontFacing => ("gl_FrontFacing", AggregateType.Bool), + IoVariable.InstanceId => ("gl_InstanceID", AggregateType.S32), + IoVariable.InstanceIndex => ("gl_InstanceIndex", AggregateType.S32), + IoVariable.InvocationId => ("gl_InvocationID", AggregateType.S32), + IoVariable.Layer => ("gl_Layer", AggregateType.S32), + IoVariable.PatchVertices => ("gl_PatchVerticesIn", AggregateType.S32), + IoVariable.PointCoord => ("gl_PointCoord", AggregateType.Vector2 | AggregateType.FP32), + IoVariable.PointSize => ("gl_PointSize", AggregateType.FP32), + IoVariable.Position => ("gl_Position", AggregateType.Vector4 | AggregateType.FP32), + IoVariable.PrimitiveId => GetPrimitiveIdVariableName(config.Stage, isOutput), + IoVariable.SubgroupEqMask => GetSubgroupMaskVariableName(config, "Eq"), + IoVariable.SubgroupGeMask => GetSubgroupMaskVariableName(config, "Ge"), + IoVariable.SubgroupGtMask => GetSubgroupMaskVariableName(config, "Gt"), + IoVariable.SubgroupLaneId => GetSubgroupInvocationIdVariableName(config), + IoVariable.SubgroupLeMask => GetSubgroupMaskVariableName(config, "Le"), + IoVariable.SubgroupLtMask => GetSubgroupMaskVariableName(config, "Lt"), + IoVariable.SupportBlockRenderScale => (DefaultNames.SupportBlockRenderScaleName, AggregateType.Array | AggregateType.FP32), + IoVariable.SupportBlockViewInverse => (DefaultNames.SupportBlockViewportInverse, AggregateType.Vector2 | AggregateType.FP32), + IoVariable.TessellationCoord => ("gl_TessCoord", AggregateType.Vector3 | AggregateType.FP32), + IoVariable.TessellationLevelInner => ("gl_TessLevelInner", AggregateType.Array | AggregateType.FP32), + IoVariable.TessellationLevelOuter => ("gl_TessLevelOuter", AggregateType.Array | AggregateType.FP32), + IoVariable.TextureCoord => ("gl_TexCoord", AggregateType.Array | AggregateType.Vector4 | AggregateType.FP32), // Deprecated. + IoVariable.ThreadId => ("gl_LocalInvocationID", AggregateType.Vector3 | AggregateType.U32), + IoVariable.ThreadKill => ("gl_HelperInvocation", AggregateType.Bool), + IoVariable.UserDefined => GetUserDefinedVariableName(config, location, component, isOutput, isPerPatch), + IoVariable.VertexId => ("gl_VertexID", AggregateType.S32), + IoVariable.VertexIndex => ("gl_VertexIndex", AggregateType.S32), + IoVariable.ViewportIndex => ("gl_ViewportIndex", AggregateType.S32), + IoVariable.ViewportMask => ("gl_ViewportMask", AggregateType.Array | AggregateType.S32), + _ => (null, AggregateType.Invalid) + }; + } + + public static bool IsPerVertexBuiltIn(ShaderStage stage, IoVariable ioVariable, bool isOutput) + { + switch (ioVariable) + { + case IoVariable.Layer: + case IoVariable.ViewportIndex: + case IoVariable.PointSize: + case IoVariable.Position: + case IoVariable.ClipDistance: + case IoVariable.PointCoord: + case IoVariable.ViewportMask: + if (isOutput) + { + return stage == ShaderStage.TessellationControl; + } + else + { + return stage == ShaderStage.TessellationControl || + stage == ShaderStage.TessellationEvaluation || + stage == ShaderStage.Geometry; + } + } + + return false; + } + + private static (string, AggregateType) GetFragmentOutputColorVariableName(ShaderConfig config, int location) + { + if (location < 0) + { + return (DefaultNames.OAttributePrefix, config.GetFragmentOutputColorType(0)); + } + + string name = DefaultNames.OAttributePrefix + location.ToString(CultureInfo.InvariantCulture); + + return (name, config.GetFragmentOutputColorType(location)); + } + + private static (string, AggregateType) GetPrimitiveIdVariableName(ShaderStage stage, bool isOutput) + { + // The geometry stage has an additional gl_PrimitiveIDIn variable. + return (isOutput || stage != ShaderStage.Geometry ? "gl_PrimitiveID" : "gl_PrimitiveIDIn", AggregateType.S32); + } + + private static (string, AggregateType) GetSubgroupMaskVariableName(ShaderConfig config, string cc) + { + return config.GpuAccessor.QueryHostSupportsShaderBallot() + ? ($"unpackUint2x32(gl_SubGroup{cc}MaskARB)", AggregateType.Vector2 | AggregateType.U32) + : ($"gl_Subgroup{cc}Mask", AggregateType.Vector4 | AggregateType.U32); + } + + private static (string, AggregateType) GetSubgroupInvocationIdVariableName(ShaderConfig config) + { + return config.GpuAccessor.QueryHostSupportsShaderBallot() + ? ("gl_SubGroupInvocationARB", AggregateType.U32) + : ("gl_SubgroupInvocationID", AggregateType.U32); + } + + private static (string, AggregateType) GetUserDefinedVariableName(ShaderConfig config, int location, int component, bool isOutput, bool isPerPatch) + { + string name = isPerPatch + ? DefaultNames.PerPatchAttributePrefix + : (isOutput ? DefaultNames.OAttributePrefix : DefaultNames.IAttributePrefix); + + if (location < 0) + { + return (name, config.GetUserDefinedType(0, isOutput)); + } + + name += location.ToString(CultureInfo.InvariantCulture); + + if (config.HasPerLocationInputOrOutputComponent(IoVariable.UserDefined, location, component, isOutput)) + { + name += "_" + "xyzw"[component & 3]; + } + + return (name, config.GetUserDefinedType(location, isOutput)); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/NumberFormatter.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/NumberFormatter.cs new file mode 100644 index 00000000..eb27e9bf --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/NumberFormatter.cs @@ -0,0 +1,104 @@ +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Globalization; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl +{ + static class NumberFormatter + { + private const int MaxDecimal = 256; + + public static bool TryFormat(int value, AggregateType dstType, out string formatted) + { + if (dstType == AggregateType.FP32) + { + return TryFormatFloat(BitConverter.Int32BitsToSingle(value), out formatted); + } + else if (dstType == AggregateType.S32) + { + formatted = FormatInt(value); + } + else if (dstType == AggregateType.U32) + { + formatted = FormatUint((uint)value); + } + else if (dstType == AggregateType.Bool) + { + formatted = value != 0 ? "true" : "false"; + } + else + { + throw new ArgumentException($"Invalid variable type \"{dstType}\"."); + } + + return true; + } + + public static string FormatFloat(float value) + { + if (!TryFormatFloat(value, out string formatted)) + { + throw new ArgumentException("Failed to convert float value to string."); + } + + return formatted; + } + + public static bool TryFormatFloat(float value, out string formatted) + { + if (float.IsNaN(value) || float.IsInfinity(value)) + { + formatted = null; + + return false; + } + + formatted = value.ToString("G9", CultureInfo.InvariantCulture); + + if (!(formatted.Contains('.') || + formatted.Contains('e') || + formatted.Contains('E'))) + { + formatted += ".0"; + } + + return true; + } + + public static string FormatInt(int value, AggregateType dstType) + { + if (dstType == AggregateType.S32) + { + return FormatInt(value); + } + else if (dstType == AggregateType.U32) + { + return FormatUint((uint)value); + } + else + { + throw new ArgumentException($"Invalid variable type \"{dstType}\"."); + } + } + + public static string FormatInt(int value) + { + if (value <= MaxDecimal && value >= -MaxDecimal) + { + return value.ToString(CultureInfo.InvariantCulture); + } + + return "0x" + value.ToString("X", CultureInfo.InvariantCulture); + } + + public static string FormatUint(uint value) + { + if (value <= MaxDecimal && value >= 0) + { + return value.ToString(CultureInfo.InvariantCulture) + "u"; + } + + return "0x" + value.ToString("X", CultureInfo.InvariantCulture) + "u"; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs new file mode 100644 index 00000000..92e83358 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs @@ -0,0 +1,254 @@ +using Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Collections.Generic; +using System.Diagnostics; + +using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl +{ + class OperandManager + { + private static readonly string[] _stagePrefixes = new string[] { "cp", "vp", "tcp", "tep", "gp", "fp" }; + + private Dictionary<AstOperand, string> _locals; + + public OperandManager() + { + _locals = new Dictionary<AstOperand, string>(); + } + + public string DeclareLocal(AstOperand operand) + { + string name = $"{DefaultNames.LocalNamePrefix}_{_locals.Count}"; + + _locals.Add(operand, name); + + return name; + } + + public string GetExpression(CodeGenContext context, AstOperand operand) + { + return operand.Type switch + { + OperandType.Argument => GetArgumentName(operand.Value), + OperandType.Constant => NumberFormatter.FormatInt(operand.Value), + OperandType.ConstantBuffer => GetConstantBufferName(operand, context.Config), + OperandType.LocalVariable => _locals[operand], + OperandType.Undefined => DefaultNames.UndefinedName, + _ => throw new ArgumentException($"Invalid operand type \"{operand.Type}\".") + }; + } + + private static string GetConstantBufferName(AstOperand operand, ShaderConfig config) + { + return GetConstantBufferName(operand.CbufSlot, operand.CbufOffset, config.Stage, config.UsedFeatures.HasFlag(FeatureFlags.CbIndexing)); + } + + public static string GetConstantBufferName(int slot, int offset, ShaderStage stage, bool cbIndexable) + { + return $"{GetUbName(stage, slot, cbIndexable)}[{offset >> 2}].{GetSwizzleMask(offset & 3)}"; + } + + private static string GetVec4Indexed(string vectorName, string indexExpr, bool indexElement) + { + if (indexElement) + { + return $"{vectorName}[{indexExpr}]"; + } + + string result = $"{vectorName}.x"; + for (int i = 1; i < 4; i++) + { + result = $"(({indexExpr}) == {i}) ? ({vectorName}.{GetSwizzleMask(i)}) : ({result})"; + } + return $"({result})"; + } + + public static string GetConstantBufferName(int slot, string offsetExpr, ShaderStage stage, bool cbIndexable, bool indexElement) + { + return GetVec4Indexed(GetUbName(stage, slot, cbIndexable) + $"[{offsetExpr} >> 2]", offsetExpr + " & 3", indexElement); + } + + public static string GetConstantBufferName(string slotExpr, string offsetExpr, ShaderStage stage, bool indexElement) + { + return GetVec4Indexed(GetUbName(stage, slotExpr) + $"[{offsetExpr} >> 2]", offsetExpr + " & 3", indexElement); + } + + public static string GetUbName(ShaderStage stage, int slot, bool cbIndexable) + { + if (cbIndexable) + { + return GetUbName(stage, NumberFormatter.FormatInt(slot, AggregateType.S32)); + } + + return $"{GetShaderStagePrefix(stage)}_{DefaultNames.UniformNamePrefix}{slot}_{DefaultNames.UniformNameSuffix}"; + } + + private static string GetUbName(ShaderStage stage, string slotExpr) + { + return $"{GetShaderStagePrefix(stage)}_{DefaultNames.UniformNamePrefix}[{slotExpr}].{DefaultNames.DataName}"; + } + + public static string GetSamplerName(ShaderStage stage, AstTextureOperation texOp, string indexExpr) + { + return GetSamplerName(stage, texOp.CbufSlot, texOp.Handle, texOp.Type.HasFlag(SamplerType.Indexed), indexExpr); + } + + public static string GetSamplerName(ShaderStage stage, int cbufSlot, int handle, bool indexed, string indexExpr) + { + string suffix = cbufSlot < 0 ? $"_tcb_{handle:X}" : $"_cb{cbufSlot}_{handle:X}"; + + if (indexed) + { + suffix += $"a[{indexExpr}]"; + } + + return GetShaderStagePrefix(stage) + "_" + DefaultNames.SamplerNamePrefix + suffix; + } + + public static string GetImageName(ShaderStage stage, AstTextureOperation texOp, string indexExpr) + { + return GetImageName(stage, texOp.CbufSlot, texOp.Handle, texOp.Format, texOp.Type.HasFlag(SamplerType.Indexed), indexExpr); + } + + public static string GetImageName( + ShaderStage stage, + int cbufSlot, + int handle, + TextureFormat format, + bool indexed, + string indexExpr) + { + string suffix = cbufSlot < 0 + ? $"_tcb_{handle:X}_{format.ToGlslFormat()}" + : $"_cb{cbufSlot}_{handle:X}_{format.ToGlslFormat()}"; + + if (indexed) + { + suffix += $"a[{indexExpr}]"; + } + + return GetShaderStagePrefix(stage) + "_" + DefaultNames.ImageNamePrefix + suffix; + } + + public static string GetShaderStagePrefix(ShaderStage stage) + { + int index = (int)stage; + + if ((uint)index >= _stagePrefixes.Length) + { + return "invalid"; + } + + return _stagePrefixes[index]; + } + + private static char GetSwizzleMask(int value) + { + return "xyzw"[value]; + } + + public static string GetArgumentName(int argIndex) + { + return $"{DefaultNames.ArgumentNamePrefix}{argIndex}"; + } + + public static AggregateType GetNodeDestType(CodeGenContext context, IAstNode node) + { + // TODO: Get rid of that function entirely and return the type from the operation generation + // functions directly, like SPIR-V does. + + if (node is AstOperation operation) + { + if (operation.Inst == Instruction.Load) + { + switch (operation.StorageKind) + { + case StorageKind.Input: + case StorageKind.InputPerPatch: + case StorageKind.Output: + case StorageKind.OutputPerPatch: + if (!(operation.GetSource(0) is AstOperand varId) || varId.Type != OperandType.Constant) + { + throw new InvalidOperationException($"First input of {operation.Inst} with {operation.StorageKind} storage must be a constant operand."); + } + + IoVariable ioVariable = (IoVariable)varId.Value; + bool isOutput = operation.StorageKind == StorageKind.Output || operation.StorageKind == StorageKind.OutputPerPatch; + bool isPerPatch = operation.StorageKind == StorageKind.InputPerPatch || operation.StorageKind == StorageKind.OutputPerPatch; + int location = 0; + int component = 0; + + if (context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput)) + { + if (!(operation.GetSource(1) is AstOperand vecIndex) || vecIndex.Type != OperandType.Constant) + { + throw new InvalidOperationException($"Second input of {operation.Inst} with {operation.StorageKind} storage must be a constant operand."); + } + + location = vecIndex.Value; + + if (operation.SourcesCount > 2 && + operation.GetSource(2) is AstOperand elemIndex && + elemIndex.Type == OperandType.Constant && + context.Config.HasPerLocationInputOrOutputComponent(ioVariable, location, elemIndex.Value, isOutput)) + { + component = elemIndex.Value; + } + } + + (_, AggregateType varType) = IoMap.GetGlslVariable(context.Config, ioVariable, location, component, isOutput, isPerPatch); + + return varType & AggregateType.ElementTypeMask; + } + } + else if (operation.Inst == Instruction.Call) + { + AstOperand funcId = (AstOperand)operation.GetSource(0); + + Debug.Assert(funcId.Type == OperandType.Constant); + + return context.GetFunction(funcId.Value).ReturnType; + } + else if (operation.Inst == Instruction.VectorExtract) + { + return GetNodeDestType(context, operation.GetSource(0)) & ~AggregateType.ElementCountMask; + } + else if (operation is AstTextureOperation texOp) + { + if (texOp.Inst == Instruction.ImageLoad || + texOp.Inst == Instruction.ImageStore || + texOp.Inst == Instruction.ImageAtomic) + { + return texOp.GetVectorType(texOp.Format.GetComponentType()); + } + else if (texOp.Inst == Instruction.TextureSample) + { + return texOp.GetVectorType(GetDestVarType(operation.Inst)); + } + } + + return GetDestVarType(operation.Inst); + } + else if (node is AstOperand operand) + { + if (operand.Type == OperandType.Argument) + { + int argIndex = operand.Value; + + return context.CurrentFunction.GetArgumentType(argIndex); + } + + return OperandInfo.GetVarType(operand); + } + else + { + throw new ArgumentException($"Invalid node type \"{node?.GetType().Name ?? "null"}\"."); + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/TypeConversion.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/TypeConversion.cs new file mode 100644 index 00000000..22c8623c --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/TypeConversion.cs @@ -0,0 +1,87 @@ +using Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl +{ + static class TypeConversion + { + public static string ReinterpretCast( + CodeGenContext context, + IAstNode node, + AggregateType srcType, + AggregateType dstType) + { + if (node is AstOperand operand && operand.Type == OperandType.Constant) + { + if (NumberFormatter.TryFormat(operand.Value, dstType, out string formatted)) + { + return formatted; + } + } + + string expr = InstGen.GetExpression(context, node); + + return ReinterpretCast(expr, node, srcType, dstType); + } + + private static string ReinterpretCast(string expr, IAstNode node, AggregateType srcType, AggregateType dstType) + { + if (srcType == dstType) + { + return expr; + } + + if (srcType == AggregateType.FP32) + { + switch (dstType) + { + case AggregateType.Bool: return $"(floatBitsToInt({expr}) != 0)"; + case AggregateType.S32: return $"floatBitsToInt({expr})"; + case AggregateType.U32: return $"floatBitsToUint({expr})"; + } + } + else if (dstType == AggregateType.FP32) + { + switch (srcType) + { + case AggregateType.Bool: return $"intBitsToFloat({ReinterpretBoolToInt(expr, node, AggregateType.S32)})"; + case AggregateType.S32: return $"intBitsToFloat({expr})"; + case AggregateType.U32: return $"uintBitsToFloat({expr})"; + } + } + else if (srcType == AggregateType.Bool) + { + return ReinterpretBoolToInt(expr, node, dstType); + } + else if (dstType == AggregateType.Bool) + { + expr = InstGenHelper.Enclose(expr, node, Instruction.CompareNotEqual, isLhs: true); + + return $"({expr} != 0)"; + } + else if (dstType == AggregateType.S32) + { + return $"int({expr})"; + } + else if (dstType == AggregateType.U32) + { + return $"uint({expr})"; + } + + throw new ArgumentException($"Invalid reinterpret cast from \"{srcType}\" to \"{dstType}\"."); + } + + private static string ReinterpretBoolToInt(string expr, IAstNode node, AggregateType dstType) + { + string trueExpr = NumberFormatter.FormatInt(IrConsts.True, dstType); + string falseExpr = NumberFormatter.FormatInt(IrConsts.False, dstType); + + expr = InstGenHelper.Enclose(expr, node, Instruction.ConditionalSelect, isLhs: false); + + return $"({expr} ? {trueExpr} : {falseExpr})"; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs new file mode 100644 index 00000000..ed292ef1 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs @@ -0,0 +1,409 @@ +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using Spv.Generator; +using System; +using System.Collections.Generic; +using static Spv.Specification; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + using IrConsts = IntermediateRepresentation.IrConsts; + using IrOperandType = IntermediateRepresentation.OperandType; + + partial class CodeGenContext : Module + { + private const uint SpirvVersionMajor = 1; + private const uint SpirvVersionMinor = 3; + private const uint SpirvVersionRevision = 0; + private const uint SpirvVersionPacked = (SpirvVersionMajor << 16) | (SpirvVersionMinor << 8) | SpirvVersionRevision; + + public StructuredProgramInfo Info { get; } + + public ShaderConfig Config { get; } + + public int InputVertices { get; } + + public Dictionary<int, Instruction> UniformBuffers { get; } = new Dictionary<int, Instruction>(); + public Instruction SupportBuffer { get; set; } + public Instruction UniformBuffersArray { get; set; } + public Instruction StorageBuffersArray { get; set; } + public Instruction LocalMemory { get; set; } + public Instruction SharedMemory { get; set; } + public Dictionary<TextureMeta, SamplerType> SamplersTypes { get; } = new Dictionary<TextureMeta, SamplerType>(); + public Dictionary<TextureMeta, (Instruction, Instruction, Instruction)> Samplers { get; } = new Dictionary<TextureMeta, (Instruction, Instruction, Instruction)>(); + public Dictionary<TextureMeta, (Instruction, Instruction)> Images { get; } = new Dictionary<TextureMeta, (Instruction, Instruction)>(); + public Dictionary<IoDefinition, Instruction> Inputs { get; } = new Dictionary<IoDefinition, Instruction>(); + public Dictionary<IoDefinition, Instruction> Outputs { get; } = new Dictionary<IoDefinition, Instruction>(); + public Dictionary<IoDefinition, Instruction> InputsPerPatch { get; } = new Dictionary<IoDefinition, Instruction>(); + public Dictionary<IoDefinition, Instruction> OutputsPerPatch { get; } = new Dictionary<IoDefinition, Instruction>(); + + public Instruction CoordTemp { get; set; } + private readonly Dictionary<AstOperand, Instruction> _locals = new Dictionary<AstOperand, Instruction>(); + private readonly Dictionary<int, Instruction[]> _localForArgs = new Dictionary<int, Instruction[]>(); + private readonly Dictionary<int, Instruction> _funcArgs = new Dictionary<int, Instruction>(); + private readonly Dictionary<int, (StructuredFunction, Instruction)> _functions = new Dictionary<int, (StructuredFunction, Instruction)>(); + + private class BlockState + { + private int _entryCount; + private readonly List<Instruction> _labels = new List<Instruction>(); + + public Instruction GetNextLabel(CodeGenContext context) + { + return GetLabel(context, _entryCount); + } + + public Instruction GetNextLabelAutoIncrement(CodeGenContext context) + { + return GetLabel(context, _entryCount++); + } + + public Instruction GetLabel(CodeGenContext context, int index) + { + while (index >= _labels.Count) + { + _labels.Add(context.Label()); + } + + return _labels[index]; + } + } + + private readonly Dictionary<AstBlock, BlockState> _labels = new Dictionary<AstBlock, BlockState>(); + + public Dictionary<AstBlock, (Instruction, Instruction)> LoopTargets { get; set; } + + public AstBlock CurrentBlock { get; private set; } + + public SpirvDelegates Delegates { get; } + + public CodeGenContext( + StructuredProgramInfo info, + ShaderConfig config, + GeneratorPool<Instruction> instPool, + GeneratorPool<LiteralInteger> integerPool) : base(SpirvVersionPacked, instPool, integerPool) + { + Info = info; + Config = config; + + if (config.Stage == ShaderStage.Geometry) + { + InputTopology inPrimitive = config.GpuAccessor.QueryPrimitiveTopology(); + + InputVertices = inPrimitive switch + { + InputTopology.Points => 1, + InputTopology.Lines => 2, + InputTopology.LinesAdjacency => 2, + InputTopology.Triangles => 3, + InputTopology.TrianglesAdjacency => 3, + _ => throw new InvalidOperationException($"Invalid input topology \"{inPrimitive}\".") + }; + } + + AddCapability(Capability.Shader); + AddCapability(Capability.Float64); + + SetMemoryModel(AddressingModel.Logical, MemoryModel.GLSL450); + + Delegates = new SpirvDelegates(this); + } + + public void StartFunction() + { + _locals.Clear(); + _localForArgs.Clear(); + _funcArgs.Clear(); + } + + public void EnterBlock(AstBlock block) + { + CurrentBlock = block; + AddLabel(GetBlockStateLazy(block).GetNextLabelAutoIncrement(this)); + } + + public Instruction GetFirstLabel(AstBlock block) + { + return GetBlockStateLazy(block).GetLabel(this, 0); + } + + public Instruction GetNextLabel(AstBlock block) + { + return GetBlockStateLazy(block).GetNextLabel(this); + } + + private BlockState GetBlockStateLazy(AstBlock block) + { + if (!_labels.TryGetValue(block, out var blockState)) + { + blockState = new BlockState(); + + _labels.Add(block, blockState); + } + + return blockState; + } + + public Instruction NewBlock() + { + var label = Label(); + Branch(label); + AddLabel(label); + return label; + } + + public Instruction[] GetMainInterface() + { + var mainInterface = new List<Instruction>(); + + mainInterface.AddRange(Inputs.Values); + mainInterface.AddRange(Outputs.Values); + mainInterface.AddRange(InputsPerPatch.Values); + mainInterface.AddRange(OutputsPerPatch.Values); + + return mainInterface.ToArray(); + } + + public void DeclareLocal(AstOperand local, Instruction spvLocal) + { + _locals.Add(local, spvLocal); + } + + public void DeclareLocalForArgs(int funcIndex, Instruction[] spvLocals) + { + _localForArgs.Add(funcIndex, spvLocals); + } + + public void DeclareArgument(int argIndex, Instruction spvLocal) + { + _funcArgs.Add(argIndex, spvLocal); + } + + public void DeclareFunction(int funcIndex, StructuredFunction function, Instruction spvFunc) + { + _functions.Add(funcIndex, (function, spvFunc)); + } + + public Instruction GetFP32(IAstNode node) + { + return Get(AggregateType.FP32, node); + } + + public Instruction GetFP64(IAstNode node) + { + return Get(AggregateType.FP64, node); + } + + public Instruction GetS32(IAstNode node) + { + return Get(AggregateType.S32, node); + } + + public Instruction GetU32(IAstNode node) + { + return Get(AggregateType.U32, node); + } + + public Instruction Get(AggregateType type, IAstNode node) + { + if (node is AstOperation operation) + { + var opResult = Instructions.Generate(this, operation); + return BitcastIfNeeded(type, opResult.Type, opResult.Value); + } + else if (node is AstOperand operand) + { + return operand.Type switch + { + IrOperandType.Argument => GetArgument(type, operand), + IrOperandType.Constant => GetConstant(type, operand), + IrOperandType.ConstantBuffer => GetConstantBuffer(type, operand), + IrOperandType.LocalVariable => GetLocal(type, operand), + IrOperandType.Undefined => GetUndefined(type), + _ => throw new ArgumentException($"Invalid operand type \"{operand.Type}\".") + }; + } + + throw new NotImplementedException(node.GetType().Name); + } + + public Instruction GetWithType(IAstNode node, out AggregateType type) + { + if (node is AstOperation operation) + { + var opResult = Instructions.Generate(this, operation); + type = opResult.Type; + return opResult.Value; + } + else if (node is AstOperand operand) + { + switch (operand.Type) + { + case IrOperandType.LocalVariable: + type = operand.VarType; + return GetLocal(type, operand); + default: + throw new ArgumentException($"Invalid operand type \"{operand.Type}\"."); + } + } + + throw new NotImplementedException(node.GetType().Name); + } + + private Instruction GetUndefined(AggregateType type) + { + return type switch + { + AggregateType.Bool => ConstantFalse(TypeBool()), + AggregateType.FP32 => Constant(TypeFP32(), 0f), + AggregateType.FP64 => Constant(TypeFP64(), 0d), + _ => Constant(GetType(type), 0) + }; + } + + public Instruction GetConstant(AggregateType type, AstOperand operand) + { + return type switch + { + AggregateType.Bool => operand.Value != 0 ? ConstantTrue(TypeBool()) : ConstantFalse(TypeBool()), + AggregateType.FP32 => Constant(TypeFP32(), BitConverter.Int32BitsToSingle(operand.Value)), + AggregateType.FP64 => Constant(TypeFP64(), (double)BitConverter.Int32BitsToSingle(operand.Value)), + AggregateType.S32 => Constant(TypeS32(), operand.Value), + AggregateType.U32 => Constant(TypeU32(), (uint)operand.Value), + _ => throw new ArgumentException($"Invalid type \"{type}\".") + }; + } + + public Instruction GetConstantBuffer(AggregateType type, AstOperand operand) + { + var i1 = Constant(TypeS32(), 0); + var i2 = Constant(TypeS32(), operand.CbufOffset >> 2); + var i3 = Constant(TypeU32(), operand.CbufOffset & 3); + + Instruction elemPointer; + + if (UniformBuffersArray != null) + { + var ubVariable = UniformBuffersArray; + var i0 = Constant(TypeS32(), operand.CbufSlot); + + elemPointer = AccessChain(TypePointer(StorageClass.Uniform, TypeFP32()), ubVariable, i0, i1, i2, i3); + } + else + { + var ubVariable = UniformBuffers[operand.CbufSlot]; + + elemPointer = AccessChain(TypePointer(StorageClass.Uniform, TypeFP32()), ubVariable, i1, i2, i3); + } + + return BitcastIfNeeded(type, AggregateType.FP32, Load(TypeFP32(), elemPointer)); + } + + public Instruction GetLocalPointer(AstOperand local) + { + return _locals[local]; + } + + public Instruction[] GetLocalForArgsPointers(int funcIndex) + { + return _localForArgs[funcIndex]; + } + + public Instruction GetArgumentPointer(AstOperand funcArg) + { + return _funcArgs[funcArg.Value]; + } + + public Instruction GetLocal(AggregateType dstType, AstOperand local) + { + var srcType = local.VarType; + return BitcastIfNeeded(dstType, srcType, Load(GetType(srcType), GetLocalPointer(local))); + } + + public Instruction GetArgument(AggregateType dstType, AstOperand funcArg) + { + var srcType = funcArg.VarType; + return BitcastIfNeeded(dstType, srcType, Load(GetType(srcType), GetArgumentPointer(funcArg))); + } + + public (StructuredFunction, Instruction) GetFunction(int funcIndex) + { + return _functions[funcIndex]; + } + + public Instruction GetType(AggregateType type, int length = 1) + { + if ((type & AggregateType.Array) != 0) + { + return TypeArray(GetType(type & ~AggregateType.Array), Constant(TypeU32(), length)); + } + else if ((type & AggregateType.ElementCountMask) != 0) + { + int vectorLength = (type & AggregateType.ElementCountMask) switch + { + AggregateType.Vector2 => 2, + AggregateType.Vector3 => 3, + AggregateType.Vector4 => 4, + _ => 1 + }; + + return TypeVector(GetType(type & ~AggregateType.ElementCountMask), vectorLength); + } + + return type switch + { + AggregateType.Void => TypeVoid(), + AggregateType.Bool => TypeBool(), + AggregateType.FP32 => TypeFP32(), + AggregateType.FP64 => TypeFP64(), + AggregateType.S32 => TypeS32(), + AggregateType.U32 => TypeU32(), + _ => throw new ArgumentException($"Invalid attribute type \"{type}\".") + }; + } + + public Instruction BitcastIfNeeded(AggregateType dstType, AggregateType srcType, Instruction value) + { + if (dstType == srcType) + { + return value; + } + + if (dstType == AggregateType.Bool) + { + return INotEqual(TypeBool(), BitcastIfNeeded(AggregateType.S32, srcType, value), Constant(TypeS32(), 0)); + } + else if (srcType == AggregateType.Bool) + { + var intTrue = Constant(TypeS32(), IrConsts.True); + var intFalse = Constant(TypeS32(), IrConsts.False); + + return BitcastIfNeeded(dstType, AggregateType.S32, Select(TypeS32(), value, intTrue, intFalse)); + } + else + { + return Bitcast(GetType(dstType, 1), value); + } + } + + public Instruction TypeS32() + { + return TypeInt(32, true); + } + + public Instruction TypeU32() + { + return TypeInt(32, false); + } + + public Instruction TypeFP32() + { + return TypeFloat(32); + } + + public Instruction TypeFP64() + { + return TypeFloat(64); + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs new file mode 100644 index 00000000..821da477 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs @@ -0,0 +1,615 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using Spv.Generator; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Numerics; +using static Spv.Specification; +using SpvInstruction = Spv.Generator.Instruction; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + static class Declarations + { + private static readonly string[] StagePrefixes = new string[] { "cp", "vp", "tcp", "tep", "gp", "fp" }; + + public static void DeclareParameters(CodeGenContext context, StructuredFunction function) + { + DeclareParameters(context, function.InArguments, 0); + DeclareParameters(context, function.OutArguments, function.InArguments.Length); + } + + private static void DeclareParameters(CodeGenContext context, IEnumerable<AggregateType> argTypes, int argIndex) + { + foreach (var argType in argTypes) + { + var argPointerType = context.TypePointer(StorageClass.Function, context.GetType(argType)); + var spvArg = context.FunctionParameter(argPointerType); + + context.DeclareArgument(argIndex++, spvArg); + } + } + + public static void DeclareLocals(CodeGenContext context, StructuredFunction function) + { + foreach (AstOperand local in function.Locals) + { + var localPointerType = context.TypePointer(StorageClass.Function, context.GetType(local.VarType)); + var spvLocal = context.Variable(localPointerType, StorageClass.Function); + + context.AddLocalVariable(spvLocal); + context.DeclareLocal(local, spvLocal); + } + + var ivector2Type = context.TypeVector(context.TypeS32(), 2); + var coordTempPointerType = context.TypePointer(StorageClass.Function, ivector2Type); + var coordTemp = context.Variable(coordTempPointerType, StorageClass.Function); + + context.AddLocalVariable(coordTemp); + context.CoordTemp = coordTemp; + } + + public static void DeclareLocalForArgs(CodeGenContext context, List<StructuredFunction> functions) + { + for (int funcIndex = 0; funcIndex < functions.Count; funcIndex++) + { + StructuredFunction function = functions[funcIndex]; + SpvInstruction[] locals = new SpvInstruction[function.InArguments.Length]; + + for (int i = 0; i < function.InArguments.Length; i++) + { + var type = function.GetArgumentType(i); + var localPointerType = context.TypePointer(StorageClass.Function, context.GetType(type)); + var spvLocal = context.Variable(localPointerType, StorageClass.Function); + + context.AddLocalVariable(spvLocal); + + locals[i] = spvLocal; + } + + context.DeclareLocalForArgs(funcIndex, locals); + } + } + + public static void DeclareAll(CodeGenContext context, StructuredProgramInfo info) + { + if (context.Config.Stage == ShaderStage.Compute) + { + int localMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeLocalMemorySize(), 4); + + if (localMemorySize != 0) + { + DeclareLocalMemory(context, localMemorySize); + } + + int sharedMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeSharedMemorySize(), 4); + + if (sharedMemorySize != 0) + { + DeclareSharedMemory(context, sharedMemorySize); + } + } + else if (context.Config.LocalMemorySize != 0) + { + int localMemorySize = BitUtils.DivRoundUp(context.Config.LocalMemorySize, 4); + DeclareLocalMemory(context, localMemorySize); + } + + DeclareSupportBuffer(context); + DeclareUniformBuffers(context, context.Config.GetConstantBufferDescriptors()); + DeclareStorageBuffers(context, context.Config.GetStorageBufferDescriptors()); + DeclareSamplers(context, context.Config.GetTextureDescriptors()); + DeclareImages(context, context.Config.GetImageDescriptors()); + DeclareInputsAndOutputs(context, info); + } + + private static void DeclareLocalMemory(CodeGenContext context, int size) + { + context.LocalMemory = DeclareMemory(context, StorageClass.Private, size); + } + + private static void DeclareSharedMemory(CodeGenContext context, int size) + { + context.SharedMemory = DeclareMemory(context, StorageClass.Workgroup, size); + } + + private static SpvInstruction DeclareMemory(CodeGenContext context, StorageClass storage, int size) + { + var arrayType = context.TypeArray(context.TypeU32(), context.Constant(context.TypeU32(), size)); + var pointerType = context.TypePointer(storage, arrayType); + var variable = context.Variable(pointerType, storage); + + context.AddGlobalVariable(variable); + + return variable; + } + + private static void DeclareSupportBuffer(CodeGenContext context) + { + if (!context.Config.Stage.SupportsRenderScale() && !(context.Config.LastInVertexPipeline && context.Config.GpuAccessor.QueryViewportTransformDisable())) + { + return; + } + + var isBgraArrayType = context.TypeArray(context.TypeU32(), context.Constant(context.TypeU32(), SupportBuffer.FragmentIsBgraCount)); + var viewportInverseVectorType = context.TypeVector(context.TypeFP32(), 4); + var renderScaleArrayType = context.TypeArray(context.TypeFP32(), context.Constant(context.TypeU32(), SupportBuffer.RenderScaleMaxCount)); + + context.Decorate(isBgraArrayType, Decoration.ArrayStride, (LiteralInteger)SupportBuffer.FieldSize); + context.Decorate(renderScaleArrayType, Decoration.ArrayStride, (LiteralInteger)SupportBuffer.FieldSize); + + var supportBufferStructType = context.TypeStruct(false, context.TypeU32(), isBgraArrayType, viewportInverseVectorType, context.TypeS32(), renderScaleArrayType); + + context.MemberDecorate(supportBufferStructType, 0, Decoration.Offset, (LiteralInteger)SupportBuffer.FragmentAlphaTestOffset); + context.MemberDecorate(supportBufferStructType, 1, Decoration.Offset, (LiteralInteger)SupportBuffer.FragmentIsBgraOffset); + context.MemberDecorate(supportBufferStructType, 2, Decoration.Offset, (LiteralInteger)SupportBuffer.ViewportInverseOffset); + context.MemberDecorate(supportBufferStructType, 3, Decoration.Offset, (LiteralInteger)SupportBuffer.FragmentRenderScaleCountOffset); + context.MemberDecorate(supportBufferStructType, 4, Decoration.Offset, (LiteralInteger)SupportBuffer.GraphicsRenderScaleOffset); + context.Decorate(supportBufferStructType, Decoration.Block); + + var supportBufferPointerType = context.TypePointer(StorageClass.Uniform, supportBufferStructType); + var supportBufferVariable = context.Variable(supportBufferPointerType, StorageClass.Uniform); + + context.Decorate(supportBufferVariable, Decoration.DescriptorSet, (LiteralInteger)0); + context.Decorate(supportBufferVariable, Decoration.Binding, (LiteralInteger)0); + + context.AddGlobalVariable(supportBufferVariable); + + context.SupportBuffer = supportBufferVariable; + } + + private static void DeclareUniformBuffers(CodeGenContext context, BufferDescriptor[] descriptors) + { + if (descriptors.Length == 0) + { + return; + } + + uint ubSize = Constants.ConstantBufferSize / 16; + + var ubArrayType = context.TypeArray(context.TypeVector(context.TypeFP32(), 4), context.Constant(context.TypeU32(), ubSize), true); + context.Decorate(ubArrayType, Decoration.ArrayStride, (LiteralInteger)16); + var ubStructType = context.TypeStruct(true, ubArrayType); + context.Decorate(ubStructType, Decoration.Block); + context.MemberDecorate(ubStructType, 0, Decoration.Offset, (LiteralInteger)0); + + if (context.Config.UsedFeatures.HasFlag(FeatureFlags.CbIndexing)) + { + int count = descriptors.Max(x => x.Slot) + 1; + + var ubStructArrayType = context.TypeArray(ubStructType, context.Constant(context.TypeU32(), count)); + var ubPointerType = context.TypePointer(StorageClass.Uniform, ubStructArrayType); + var ubVariable = context.Variable(ubPointerType, StorageClass.Uniform); + + context.Name(ubVariable, $"{GetStagePrefix(context.Config.Stage)}_u"); + context.Decorate(ubVariable, Decoration.DescriptorSet, (LiteralInteger)0); + context.Decorate(ubVariable, Decoration.Binding, (LiteralInteger)context.Config.FirstConstantBufferBinding); + context.AddGlobalVariable(ubVariable); + + context.UniformBuffersArray = ubVariable; + } + else + { + var ubPointerType = context.TypePointer(StorageClass.Uniform, ubStructType); + + foreach (var descriptor in descriptors) + { + var ubVariable = context.Variable(ubPointerType, StorageClass.Uniform); + + context.Name(ubVariable, $"{GetStagePrefix(context.Config.Stage)}_c{descriptor.Slot}"); + context.Decorate(ubVariable, Decoration.DescriptorSet, (LiteralInteger)0); + context.Decorate(ubVariable, Decoration.Binding, (LiteralInteger)descriptor.Binding); + context.AddGlobalVariable(ubVariable); + context.UniformBuffers.Add(descriptor.Slot, ubVariable); + } + } + } + + private static void DeclareStorageBuffers(CodeGenContext context, BufferDescriptor[] descriptors) + { + if (descriptors.Length == 0) + { + return; + } + + int setIndex = context.Config.Options.TargetApi == TargetApi.Vulkan ? 1 : 0; + int count = descriptors.Max(x => x.Slot) + 1; + + var sbArrayType = context.TypeRuntimeArray(context.TypeU32()); + context.Decorate(sbArrayType, Decoration.ArrayStride, (LiteralInteger)4); + var sbStructType = context.TypeStruct(true, sbArrayType); + context.Decorate(sbStructType, Decoration.BufferBlock); + context.MemberDecorate(sbStructType, 0, Decoration.Offset, (LiteralInteger)0); + var sbStructArrayType = context.TypeArray(sbStructType, context.Constant(context.TypeU32(), count)); + var sbPointerType = context.TypePointer(StorageClass.Uniform, sbStructArrayType); + var sbVariable = context.Variable(sbPointerType, StorageClass.Uniform); + + context.Name(sbVariable, $"{GetStagePrefix(context.Config.Stage)}_s"); + context.Decorate(sbVariable, Decoration.DescriptorSet, (LiteralInteger)setIndex); + context.Decorate(sbVariable, Decoration.Binding, (LiteralInteger)context.Config.FirstStorageBufferBinding); + context.AddGlobalVariable(sbVariable); + + context.StorageBuffersArray = sbVariable; + } + + private static void DeclareSamplers(CodeGenContext context, TextureDescriptor[] descriptors) + { + foreach (var descriptor in descriptors) + { + var meta = new TextureMeta(descriptor.CbufSlot, descriptor.HandleIndex, descriptor.Format); + + if (context.Samplers.ContainsKey(meta)) + { + continue; + } + + int setIndex = context.Config.Options.TargetApi == TargetApi.Vulkan ? 2 : 0; + + var dim = (descriptor.Type & SamplerType.Mask) switch + { + SamplerType.Texture1D => Dim.Dim1D, + SamplerType.Texture2D => Dim.Dim2D, + SamplerType.Texture3D => Dim.Dim3D, + SamplerType.TextureCube => Dim.Cube, + SamplerType.TextureBuffer => Dim.Buffer, + _ => throw new InvalidOperationException($"Invalid sampler type \"{descriptor.Type & SamplerType.Mask}\".") + }; + + var imageType = context.TypeImage( + context.TypeFP32(), + dim, + descriptor.Type.HasFlag(SamplerType.Shadow), + descriptor.Type.HasFlag(SamplerType.Array), + descriptor.Type.HasFlag(SamplerType.Multisample), + 1, + ImageFormat.Unknown); + + var nameSuffix = meta.CbufSlot < 0 ? $"_tcb_{meta.Handle:X}" : $"_cb{meta.CbufSlot}_{meta.Handle:X}"; + + var sampledImageType = context.TypeSampledImage(imageType); + var sampledImagePointerType = context.TypePointer(StorageClass.UniformConstant, sampledImageType); + var sampledImageVariable = context.Variable(sampledImagePointerType, StorageClass.UniformConstant); + + context.Samplers.Add(meta, (imageType, sampledImageType, sampledImageVariable)); + context.SamplersTypes.Add(meta, descriptor.Type); + + context.Name(sampledImageVariable, $"{GetStagePrefix(context.Config.Stage)}_tex{nameSuffix}"); + context.Decorate(sampledImageVariable, Decoration.DescriptorSet, (LiteralInteger)setIndex); + context.Decorate(sampledImageVariable, Decoration.Binding, (LiteralInteger)descriptor.Binding); + context.AddGlobalVariable(sampledImageVariable); + } + } + + private static void DeclareImages(CodeGenContext context, TextureDescriptor[] descriptors) + { + foreach (var descriptor in descriptors) + { + var meta = new TextureMeta(descriptor.CbufSlot, descriptor.HandleIndex, descriptor.Format); + + if (context.Images.ContainsKey(meta)) + { + continue; + } + + int setIndex = context.Config.Options.TargetApi == TargetApi.Vulkan ? 3 : 0; + + var dim = GetDim(descriptor.Type); + + var imageType = context.TypeImage( + context.GetType(meta.Format.GetComponentType()), + dim, + descriptor.Type.HasFlag(SamplerType.Shadow), + descriptor.Type.HasFlag(SamplerType.Array), + descriptor.Type.HasFlag(SamplerType.Multisample), + AccessQualifier.ReadWrite, + GetImageFormat(meta.Format)); + + var nameSuffix = meta.CbufSlot < 0 ? + $"_tcb_{meta.Handle:X}_{meta.Format.ToGlslFormat()}" : + $"_cb{meta.CbufSlot}_{meta.Handle:X}_{meta.Format.ToGlslFormat()}"; + + var imagePointerType = context.TypePointer(StorageClass.UniformConstant, imageType); + var imageVariable = context.Variable(imagePointerType, StorageClass.UniformConstant); + + context.Images.Add(meta, (imageType, imageVariable)); + + context.Name(imageVariable, $"{GetStagePrefix(context.Config.Stage)}_img{nameSuffix}"); + context.Decorate(imageVariable, Decoration.DescriptorSet, (LiteralInteger)setIndex); + context.Decorate(imageVariable, Decoration.Binding, (LiteralInteger)descriptor.Binding); + + if (descriptor.Flags.HasFlag(TextureUsageFlags.ImageCoherent)) + { + context.Decorate(imageVariable, Decoration.Coherent); + } + + context.AddGlobalVariable(imageVariable); + } + } + + private static Dim GetDim(SamplerType type) + { + return (type & SamplerType.Mask) switch + { + SamplerType.Texture1D => Dim.Dim1D, + SamplerType.Texture2D => Dim.Dim2D, + SamplerType.Texture3D => Dim.Dim3D, + SamplerType.TextureCube => Dim.Cube, + SamplerType.TextureBuffer => Dim.Buffer, + _ => throw new ArgumentException($"Invalid sampler type \"{type & SamplerType.Mask}\".") + }; + } + + private static ImageFormat GetImageFormat(TextureFormat format) + { + return format switch + { + TextureFormat.Unknown => ImageFormat.Unknown, + TextureFormat.R8Unorm => ImageFormat.R8, + TextureFormat.R8Snorm => ImageFormat.R8Snorm, + TextureFormat.R8Uint => ImageFormat.R8ui, + TextureFormat.R8Sint => ImageFormat.R8i, + TextureFormat.R16Float => ImageFormat.R16f, + TextureFormat.R16Unorm => ImageFormat.R16, + TextureFormat.R16Snorm => ImageFormat.R16Snorm, + TextureFormat.R16Uint => ImageFormat.R16ui, + TextureFormat.R16Sint => ImageFormat.R16i, + TextureFormat.R32Float => ImageFormat.R32f, + TextureFormat.R32Uint => ImageFormat.R32ui, + TextureFormat.R32Sint => ImageFormat.R32i, + TextureFormat.R8G8Unorm => ImageFormat.Rg8, + TextureFormat.R8G8Snorm => ImageFormat.Rg8Snorm, + TextureFormat.R8G8Uint => ImageFormat.Rg8ui, + TextureFormat.R8G8Sint => ImageFormat.Rg8i, + TextureFormat.R16G16Float => ImageFormat.Rg16f, + TextureFormat.R16G16Unorm => ImageFormat.Rg16, + TextureFormat.R16G16Snorm => ImageFormat.Rg16Snorm, + TextureFormat.R16G16Uint => ImageFormat.Rg16ui, + TextureFormat.R16G16Sint => ImageFormat.Rg16i, + TextureFormat.R32G32Float => ImageFormat.Rg32f, + TextureFormat.R32G32Uint => ImageFormat.Rg32ui, + TextureFormat.R32G32Sint => ImageFormat.Rg32i, + TextureFormat.R8G8B8A8Unorm => ImageFormat.Rgba8, + TextureFormat.R8G8B8A8Snorm => ImageFormat.Rgba8Snorm, + TextureFormat.R8G8B8A8Uint => ImageFormat.Rgba8ui, + TextureFormat.R8G8B8A8Sint => ImageFormat.Rgba8i, + TextureFormat.R16G16B16A16Float => ImageFormat.Rgba16f, + TextureFormat.R16G16B16A16Unorm => ImageFormat.Rgba16, + TextureFormat.R16G16B16A16Snorm => ImageFormat.Rgba16Snorm, + TextureFormat.R16G16B16A16Uint => ImageFormat.Rgba16ui, + TextureFormat.R16G16B16A16Sint => ImageFormat.Rgba16i, + TextureFormat.R32G32B32A32Float => ImageFormat.Rgba32f, + TextureFormat.R32G32B32A32Uint => ImageFormat.Rgba32ui, + TextureFormat.R32G32B32A32Sint => ImageFormat.Rgba32i, + TextureFormat.R10G10B10A2Unorm => ImageFormat.Rgb10A2, + TextureFormat.R10G10B10A2Uint => ImageFormat.Rgb10a2ui, + TextureFormat.R11G11B10Float => ImageFormat.R11fG11fB10f, + _ => throw new ArgumentException($"Invalid texture format \"{format}\".") + }; + } + + private static void DeclareInputsAndOutputs(CodeGenContext context, StructuredProgramInfo info) + { + foreach (var ioDefinition in info.IoDefinitions) + { + var ioVariable = ioDefinition.IoVariable; + + // Those are actually from constant buffer, rather than being actual inputs or outputs, + // so we must ignore them here as they are declared as part of the support buffer. + // TODO: Delete this after we represent this properly on the IR (as a constant buffer rather than "input"). + if (ioVariable == IoVariable.FragmentOutputIsBgra || + ioVariable == IoVariable.SupportBlockRenderScale || + ioVariable == IoVariable.SupportBlockViewInverse) + { + continue; + } + + bool isOutput = ioDefinition.StorageKind.IsOutput(); + bool isPerPatch = ioDefinition.StorageKind.IsPerPatch(); + + PixelImap iq = PixelImap.Unused; + + if (context.Config.Stage == ShaderStage.Fragment) + { + if (ioVariable == IoVariable.UserDefined) + { + iq = context.Config.ImapTypes[ioDefinition.Location].GetFirstUsedType(); + } + else + { + (_, AggregateType varType) = IoMap.GetSpirvBuiltIn(ioVariable); + AggregateType elemType = varType & AggregateType.ElementTypeMask; + + if (elemType == AggregateType.S32 || elemType == AggregateType.U32) + { + iq = PixelImap.Constant; + } + } + } + + DeclareInputOrOutput(context, ioDefinition, isOutput, isPerPatch, iq); + } + } + + private static void DeclareInputOrOutput(CodeGenContext context, IoDefinition ioDefinition, bool isOutput, bool isPerPatch, PixelImap iq = PixelImap.Unused) + { + IoVariable ioVariable = ioDefinition.IoVariable; + var storageClass = isOutput ? StorageClass.Output : StorageClass.Input; + + bool isBuiltIn; + BuiltIn builtIn = default; + AggregateType varType; + + if (ioVariable == IoVariable.UserDefined) + { + varType = context.Config.GetUserDefinedType(ioDefinition.Location, isOutput); + isBuiltIn = false; + } + else if (ioVariable == IoVariable.FragmentOutputColor) + { + varType = context.Config.GetFragmentOutputColorType(ioDefinition.Location); + isBuiltIn = false; + } + else + { + (builtIn, varType) = IoMap.GetSpirvBuiltIn(ioVariable); + isBuiltIn = true; + + if (varType == AggregateType.Invalid) + { + throw new InvalidOperationException($"Unknown variable {ioVariable}."); + } + } + + bool hasComponent = context.Config.HasPerLocationInputOrOutputComponent(ioVariable, ioDefinition.Location, ioDefinition.Component, isOutput); + + if (hasComponent) + { + varType &= AggregateType.ElementTypeMask; + } + else if (ioVariable == IoVariable.UserDefined && context.Config.HasTransformFeedbackOutputs(isOutput)) + { + varType &= AggregateType.ElementTypeMask; + varType |= context.Config.GetTransformFeedbackOutputComponents(ioDefinition.Location, ioDefinition.Component) switch + { + 2 => AggregateType.Vector2, + 3 => AggregateType.Vector3, + 4 => AggregateType.Vector4, + _ => AggregateType.Invalid + }; + } + + var spvType = context.GetType(varType, IoMap.GetSpirvBuiltInArrayLength(ioVariable)); + bool builtInPassthrough = false; + + if (!isPerPatch && IoMap.IsPerVertex(ioVariable, context.Config.Stage, isOutput)) + { + int arraySize = context.Config.Stage == ShaderStage.Geometry ? context.InputVertices : 32; + spvType = context.TypeArray(spvType, context.Constant(context.TypeU32(), (LiteralInteger)arraySize)); + + if (context.Config.GpPassthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough()) + { + builtInPassthrough = true; + } + } + + if (context.Config.Stage == ShaderStage.TessellationControl && isOutput && !isPerPatch) + { + spvType = context.TypeArray(spvType, context.Constant(context.TypeU32(), context.Config.ThreadsPerInputPrimitive)); + } + + var spvPointerType = context.TypePointer(storageClass, spvType); + var spvVar = context.Variable(spvPointerType, storageClass); + + if (builtInPassthrough) + { + context.Decorate(spvVar, Decoration.PassthroughNV); + } + + if (isBuiltIn) + { + if (isPerPatch) + { + context.Decorate(spvVar, Decoration.Patch); + } + + if (context.Config.GpuAccessor.QueryHostReducedPrecision() && ioVariable == IoVariable.Position) + { + context.Decorate(spvVar, Decoration.Invariant); + } + + context.Decorate(spvVar, Decoration.BuiltIn, (LiteralInteger)builtIn); + } + else if (isPerPatch) + { + context.Decorate(spvVar, Decoration.Patch); + + if (ioVariable == IoVariable.UserDefined) + { + int location = context.Config.GetPerPatchAttributeLocation(ioDefinition.Location); + + context.Decorate(spvVar, Decoration.Location, (LiteralInteger)location); + } + } + else if (ioVariable == IoVariable.UserDefined) + { + context.Decorate(spvVar, Decoration.Location, (LiteralInteger)ioDefinition.Location); + + if (hasComponent) + { + context.Decorate(spvVar, Decoration.Component, (LiteralInteger)ioDefinition.Component); + } + + if (!isOutput && + !isPerPatch && + (context.Config.PassthroughAttributes & (1 << ioDefinition.Location)) != 0 && + context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough()) + { + context.Decorate(spvVar, Decoration.PassthroughNV); + } + } + else if (ioVariable == IoVariable.FragmentOutputColor) + { + int location = ioDefinition.Location; + + if (context.Config.Stage == ShaderStage.Fragment && context.Config.GpuAccessor.QueryDualSourceBlendEnable()) + { + int firstLocation = BitOperations.TrailingZeroCount(context.Config.UsedOutputAttributes); + int index = location - firstLocation; + int mask = 3 << firstLocation; + + if ((uint)index < 2 && (context.Config.UsedOutputAttributes & mask) == mask) + { + context.Decorate(spvVar, Decoration.Location, (LiteralInteger)firstLocation); + context.Decorate(spvVar, Decoration.Index, (LiteralInteger)index); + } + else + { + context.Decorate(spvVar, Decoration.Location, (LiteralInteger)location); + } + } + else + { + context.Decorate(spvVar, Decoration.Location, (LiteralInteger)location); + } + } + + if (!isOutput) + { + switch (iq) + { + case PixelImap.Constant: + context.Decorate(spvVar, Decoration.Flat); + break; + case PixelImap.ScreenLinear: + context.Decorate(spvVar, Decoration.NoPerspective); + break; + } + } + else if (context.Config.TryGetTransformFeedbackOutput( + ioVariable, + ioDefinition.Location, + ioDefinition.Component, + out var transformFeedbackOutput)) + { + context.Decorate(spvVar, Decoration.XfbBuffer, (LiteralInteger)transformFeedbackOutput.Buffer); + context.Decorate(spvVar, Decoration.XfbStride, (LiteralInteger)transformFeedbackOutput.Stride); + context.Decorate(spvVar, Decoration.Offset, (LiteralInteger)transformFeedbackOutput.Offset); + } + + context.AddGlobalVariable(spvVar); + + var dict = isPerPatch + ? (isOutput ? context.OutputsPerPatch : context.InputsPerPatch) + : (isOutput ? context.Outputs : context.Inputs); + dict.Add(ioDefinition, spvVar); + } + + private static string GetStagePrefix(ShaderStage stage) + { + return StagePrefixes[(int)stage]; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/EnumConversion.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/EnumConversion.cs new file mode 100644 index 00000000..72541774 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/EnumConversion.cs @@ -0,0 +1,22 @@ +using System; +using static Spv.Specification; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + static class EnumConversion + { + public static ExecutionModel Convert(this ShaderStage stage) + { + return stage switch + { + ShaderStage.Compute => ExecutionModel.GLCompute, + ShaderStage.Vertex => ExecutionModel.Vertex, + ShaderStage.TessellationControl => ExecutionModel.TessellationControl, + ShaderStage.TessellationEvaluation => ExecutionModel.TessellationEvaluation, + ShaderStage.Geometry => ExecutionModel.Geometry, + ShaderStage.Fragment => ExecutionModel.Fragment, + _ => throw new ArgumentException($"Invalid shader stage \"{stage}\".") + }; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs new file mode 100644 index 00000000..b6ffdb7a --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs @@ -0,0 +1,2480 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Numerics; +using static Spv.Specification; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + using SpvInstruction = Spv.Generator.Instruction; + using SpvLiteralInteger = Spv.Generator.LiteralInteger; + + static class Instructions + { + private const MemorySemanticsMask DefaultMemorySemantics = + MemorySemanticsMask.ImageMemory | + MemorySemanticsMask.AtomicCounterMemory | + MemorySemanticsMask.WorkgroupMemory | + MemorySemanticsMask.UniformMemory | + MemorySemanticsMask.AcquireRelease; + + private static readonly Func<CodeGenContext, AstOperation, OperationResult>[] InstTable; + + static Instructions() + { + InstTable = new Func<CodeGenContext, AstOperation, OperationResult>[(int)Instruction.Count]; + + Add(Instruction.Absolute, GenerateAbsolute); + Add(Instruction.Add, GenerateAdd); + Add(Instruction.AtomicAdd, GenerateAtomicAdd); + Add(Instruction.AtomicAnd, GenerateAtomicAnd); + Add(Instruction.AtomicCompareAndSwap, GenerateAtomicCompareAndSwap); + Add(Instruction.AtomicMinS32, GenerateAtomicMinS32); + Add(Instruction.AtomicMinU32, GenerateAtomicMinU32); + Add(Instruction.AtomicMaxS32, GenerateAtomicMaxS32); + Add(Instruction.AtomicMaxU32, GenerateAtomicMaxU32); + Add(Instruction.AtomicOr, GenerateAtomicOr); + Add(Instruction.AtomicSwap, GenerateAtomicSwap); + Add(Instruction.AtomicXor, GenerateAtomicXor); + Add(Instruction.Ballot, GenerateBallot); + Add(Instruction.Barrier, GenerateBarrier); + Add(Instruction.BitCount, GenerateBitCount); + Add(Instruction.BitfieldExtractS32, GenerateBitfieldExtractS32); + Add(Instruction.BitfieldExtractU32, GenerateBitfieldExtractU32); + Add(Instruction.BitfieldInsert, GenerateBitfieldInsert); + Add(Instruction.BitfieldReverse, GenerateBitfieldReverse); + Add(Instruction.BitwiseAnd, GenerateBitwiseAnd); + Add(Instruction.BitwiseExclusiveOr, GenerateBitwiseExclusiveOr); + Add(Instruction.BitwiseNot, GenerateBitwiseNot); + Add(Instruction.BitwiseOr, GenerateBitwiseOr); + Add(Instruction.Call, GenerateCall); + Add(Instruction.Ceiling, GenerateCeiling); + Add(Instruction.Clamp, GenerateClamp); + Add(Instruction.ClampU32, GenerateClampU32); + Add(Instruction.Comment, GenerateComment); + Add(Instruction.CompareEqual, GenerateCompareEqual); + Add(Instruction.CompareGreater, GenerateCompareGreater); + Add(Instruction.CompareGreaterOrEqual, GenerateCompareGreaterOrEqual); + Add(Instruction.CompareGreaterOrEqualU32, GenerateCompareGreaterOrEqualU32); + Add(Instruction.CompareGreaterU32, GenerateCompareGreaterU32); + Add(Instruction.CompareLess, GenerateCompareLess); + Add(Instruction.CompareLessOrEqual, GenerateCompareLessOrEqual); + Add(Instruction.CompareLessOrEqualU32, GenerateCompareLessOrEqualU32); + Add(Instruction.CompareLessU32, GenerateCompareLessU32); + Add(Instruction.CompareNotEqual, GenerateCompareNotEqual); + Add(Instruction.ConditionalSelect, GenerateConditionalSelect); + Add(Instruction.ConvertFP32ToFP64, GenerateConvertFP32ToFP64); + Add(Instruction.ConvertFP32ToS32, GenerateConvertFP32ToS32); + Add(Instruction.ConvertFP32ToU32, GenerateConvertFP32ToU32); + Add(Instruction.ConvertFP64ToFP32, GenerateConvertFP64ToFP32); + Add(Instruction.ConvertFP64ToS32, GenerateConvertFP64ToS32); + Add(Instruction.ConvertFP64ToU32, GenerateConvertFP64ToU32); + Add(Instruction.ConvertS32ToFP32, GenerateConvertS32ToFP32); + Add(Instruction.ConvertS32ToFP64, GenerateConvertS32ToFP64); + Add(Instruction.ConvertU32ToFP32, GenerateConvertU32ToFP32); + Add(Instruction.ConvertU32ToFP64, GenerateConvertU32ToFP64); + Add(Instruction.Cosine, GenerateCosine); + Add(Instruction.Ddx, GenerateDdx); + Add(Instruction.Ddy, GenerateDdy); + Add(Instruction.Discard, GenerateDiscard); + Add(Instruction.Divide, GenerateDivide); + Add(Instruction.EmitVertex, GenerateEmitVertex); + Add(Instruction.EndPrimitive, GenerateEndPrimitive); + Add(Instruction.ExponentB2, GenerateExponentB2); + Add(Instruction.FSIBegin, GenerateFSIBegin); + Add(Instruction.FSIEnd, GenerateFSIEnd); + Add(Instruction.FindLSB, GenerateFindLSB); + Add(Instruction.FindMSBS32, GenerateFindMSBS32); + Add(Instruction.FindMSBU32, GenerateFindMSBU32); + Add(Instruction.Floor, GenerateFloor); + Add(Instruction.FusedMultiplyAdd, GenerateFusedMultiplyAdd); + Add(Instruction.GroupMemoryBarrier, GenerateGroupMemoryBarrier); + Add(Instruction.ImageAtomic, GenerateImageAtomic); + Add(Instruction.ImageLoad, GenerateImageLoad); + Add(Instruction.ImageStore, GenerateImageStore); + Add(Instruction.IsNan, GenerateIsNan); + Add(Instruction.Load, GenerateLoad); + Add(Instruction.LoadConstant, GenerateLoadConstant); + Add(Instruction.LoadLocal, GenerateLoadLocal); + Add(Instruction.LoadShared, GenerateLoadShared); + Add(Instruction.LoadStorage, GenerateLoadStorage); + Add(Instruction.Lod, GenerateLod); + Add(Instruction.LogarithmB2, GenerateLogarithmB2); + Add(Instruction.LogicalAnd, GenerateLogicalAnd); + Add(Instruction.LogicalExclusiveOr, GenerateLogicalExclusiveOr); + Add(Instruction.LogicalNot, GenerateLogicalNot); + Add(Instruction.LogicalOr, GenerateLogicalOr); + Add(Instruction.LoopBreak, GenerateLoopBreak); + Add(Instruction.LoopContinue, GenerateLoopContinue); + Add(Instruction.Maximum, GenerateMaximum); + Add(Instruction.MaximumU32, GenerateMaximumU32); + Add(Instruction.MemoryBarrier, GenerateMemoryBarrier); + Add(Instruction.Minimum, GenerateMinimum); + Add(Instruction.MinimumU32, GenerateMinimumU32); + Add(Instruction.Multiply, GenerateMultiply); + Add(Instruction.MultiplyHighS32, GenerateMultiplyHighS32); + Add(Instruction.MultiplyHighU32, GenerateMultiplyHighU32); + Add(Instruction.Negate, GenerateNegate); + Add(Instruction.PackDouble2x32, GeneratePackDouble2x32); + Add(Instruction.PackHalf2x16, GeneratePackHalf2x16); + Add(Instruction.ReciprocalSquareRoot, GenerateReciprocalSquareRoot); + Add(Instruction.Return, GenerateReturn); + Add(Instruction.Round, GenerateRound); + Add(Instruction.ShiftLeft, GenerateShiftLeft); + Add(Instruction.ShiftRightS32, GenerateShiftRightS32); + Add(Instruction.ShiftRightU32, GenerateShiftRightU32); + Add(Instruction.Shuffle, GenerateShuffle); + Add(Instruction.ShuffleDown, GenerateShuffleDown); + Add(Instruction.ShuffleUp, GenerateShuffleUp); + Add(Instruction.ShuffleXor, GenerateShuffleXor); + Add(Instruction.Sine, GenerateSine); + Add(Instruction.SquareRoot, GenerateSquareRoot); + Add(Instruction.Store, GenerateStore); + Add(Instruction.StoreLocal, GenerateStoreLocal); + Add(Instruction.StoreShared, GenerateStoreShared); + Add(Instruction.StoreShared16, GenerateStoreShared16); + Add(Instruction.StoreShared8, GenerateStoreShared8); + Add(Instruction.StoreStorage, GenerateStoreStorage); + Add(Instruction.StoreStorage16, GenerateStoreStorage16); + Add(Instruction.StoreStorage8, GenerateStoreStorage8); + Add(Instruction.Subtract, GenerateSubtract); + Add(Instruction.SwizzleAdd, GenerateSwizzleAdd); + Add(Instruction.TextureSample, GenerateTextureSample); + Add(Instruction.TextureSize, GenerateTextureSize); + Add(Instruction.Truncate, GenerateTruncate); + Add(Instruction.UnpackDouble2x32, GenerateUnpackDouble2x32); + Add(Instruction.UnpackHalf2x16, GenerateUnpackHalf2x16); + Add(Instruction.VectorExtract, GenerateVectorExtract); + Add(Instruction.VoteAll, GenerateVoteAll); + Add(Instruction.VoteAllEqual, GenerateVoteAllEqual); + Add(Instruction.VoteAny, GenerateVoteAny); + } + + private static void Add(Instruction inst, Func<CodeGenContext, AstOperation, OperationResult> handler) + { + InstTable[(int)(inst & Instruction.Mask)] = handler; + } + + public static OperationResult Generate(CodeGenContext context, AstOperation operation) + { + var handler = InstTable[(int)(operation.Inst & Instruction.Mask)]; + if (handler != null) + { + return handler(context, operation); + } + else + { + throw new NotImplementedException(operation.Inst.ToString()); + } + } + + private static OperationResult GenerateAbsolute(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslFAbs, context.Delegates.GlslSAbs); + } + + private static OperationResult GenerateAdd(CodeGenContext context, AstOperation operation) + { + return GenerateBinary(context, operation, context.Delegates.FAdd, context.Delegates.IAdd); + } + + private static OperationResult GenerateAtomicAdd(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicIAdd); + } + + private static OperationResult GenerateAtomicAnd(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicAnd); + } + + private static OperationResult GenerateAtomicCompareAndSwap(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryCas(context, operation); + } + + private static OperationResult GenerateAtomicMinS32(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicSMin); + } + + private static OperationResult GenerateAtomicMinU32(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicUMin); + } + + private static OperationResult GenerateAtomicMaxS32(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicSMax); + } + + private static OperationResult GenerateAtomicMaxU32(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicUMax); + } + + private static OperationResult GenerateAtomicOr(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicOr); + } + + private static OperationResult GenerateAtomicSwap(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicExchange); + } + + private static OperationResult GenerateAtomicXor(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicXor); + } + + private static OperationResult GenerateBallot(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + var uvec4Type = context.TypeVector(context.TypeU32(), 4); + var execution = context.Constant(context.TypeU32(), Scope.Subgroup); + + var maskVector = context.GroupNonUniformBallot(uvec4Type, execution, context.Get(AggregateType.Bool, source)); + var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)0); + + return new OperationResult(AggregateType.U32, mask); + } + + private static OperationResult GenerateBarrier(CodeGenContext context, AstOperation operation) + { + context.ControlBarrier( + context.Constant(context.TypeU32(), Scope.Workgroup), + context.Constant(context.TypeU32(), Scope.Workgroup), + context.Constant(context.TypeU32(), MemorySemanticsMask.WorkgroupMemory | MemorySemanticsMask.AcquireRelease)); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateBitCount(CodeGenContext context, AstOperation operation) + { + return GenerateUnaryS32(context, operation, context.Delegates.BitCount); + } + + private static OperationResult GenerateBitfieldExtractS32(CodeGenContext context, AstOperation operation) + { + return GenerateBitfieldExtractS32(context, operation, context.Delegates.BitFieldSExtract); + } + + private static OperationResult GenerateBitfieldExtractU32(CodeGenContext context, AstOperation operation) + { + return GenerateTernaryU32(context, operation, context.Delegates.BitFieldUExtract); + } + + private static OperationResult GenerateBitfieldInsert(CodeGenContext context, AstOperation operation) + { + return GenerateBitfieldInsert(context, operation, context.Delegates.BitFieldInsert); + } + + private static OperationResult GenerateBitfieldReverse(CodeGenContext context, AstOperation operation) + { + return GenerateUnaryS32(context, operation, context.Delegates.BitReverse); + } + + private static OperationResult GenerateBitwiseAnd(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryS32(context, operation, context.Delegates.BitwiseAnd); + } + + private static OperationResult GenerateBitwiseExclusiveOr(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryS32(context, operation, context.Delegates.BitwiseXor); + } + + private static OperationResult GenerateBitwiseNot(CodeGenContext context, AstOperation operation) + { + return GenerateUnaryS32(context, operation, context.Delegates.Not); + } + + private static OperationResult GenerateBitwiseOr(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryS32(context, operation, context.Delegates.BitwiseOr); + } + + private static OperationResult GenerateCall(CodeGenContext context, AstOperation operation) + { + AstOperand funcId = (AstOperand)operation.GetSource(0); + + Debug.Assert(funcId.Type == OperandType.Constant); + + (var function, var spvFunc) = context.GetFunction(funcId.Value); + + var args = new SpvInstruction[operation.SourcesCount - 1]; + var spvLocals = context.GetLocalForArgsPointers(funcId.Value); + + for (int i = 0; i < args.Length; i++) + { + var operand = (AstOperand)operation.GetSource(i + 1); + if (i >= function.InArguments.Length) + { + args[i] = context.GetLocalPointer(operand); + } + else + { + var type = function.GetArgumentType(i); + var value = context.Get(type, operand); + var spvLocal = spvLocals[i]; + + context.Store(spvLocal, value); + + args[i] = spvLocal; + } + } + + var retType = function.ReturnType; + var result = context.FunctionCall(context.GetType(retType), spvFunc, args); + return new OperationResult(retType, result); + } + + private static OperationResult GenerateCeiling(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslCeil, null); + } + + private static OperationResult GenerateClamp(CodeGenContext context, AstOperation operation) + { + return GenerateTernary(context, operation, context.Delegates.GlslFClamp, context.Delegates.GlslSClamp); + } + + private static OperationResult GenerateClampU32(CodeGenContext context, AstOperation operation) + { + return GenerateTernaryU32(context, operation, context.Delegates.GlslUClamp); + } + + private static OperationResult GenerateComment(CodeGenContext context, AstOperation operation) + { + return OperationResult.Invalid; + } + + private static OperationResult GenerateCompareEqual(CodeGenContext context, AstOperation operation) + { + return GenerateCompare(context, operation, context.Delegates.FOrdEqual, context.Delegates.IEqual); + } + + private static OperationResult GenerateCompareGreater(CodeGenContext context, AstOperation operation) + { + return GenerateCompare(context, operation, context.Delegates.FOrdGreaterThan, context.Delegates.SGreaterThan); + } + + private static OperationResult GenerateCompareGreaterOrEqual(CodeGenContext context, AstOperation operation) + { + return GenerateCompare(context, operation, context.Delegates.FOrdGreaterThanEqual, context.Delegates.SGreaterThanEqual); + } + + private static OperationResult GenerateCompareGreaterOrEqualU32(CodeGenContext context, AstOperation operation) + { + return GenerateCompareU32(context, operation, context.Delegates.UGreaterThanEqual); + } + + private static OperationResult GenerateCompareGreaterU32(CodeGenContext context, AstOperation operation) + { + return GenerateCompareU32(context, operation, context.Delegates.UGreaterThan); + } + + private static OperationResult GenerateCompareLess(CodeGenContext context, AstOperation operation) + { + return GenerateCompare(context, operation, context.Delegates.FOrdLessThan, context.Delegates.SLessThan); + } + + private static OperationResult GenerateCompareLessOrEqual(CodeGenContext context, AstOperation operation) + { + return GenerateCompare(context, operation, context.Delegates.FOrdLessThanEqual, context.Delegates.SLessThanEqual); + } + + private static OperationResult GenerateCompareLessOrEqualU32(CodeGenContext context, AstOperation operation) + { + return GenerateCompareU32(context, operation, context.Delegates.ULessThanEqual); + } + + private static OperationResult GenerateCompareLessU32(CodeGenContext context, AstOperation operation) + { + return GenerateCompareU32(context, operation, context.Delegates.ULessThan); + } + + private static OperationResult GenerateCompareNotEqual(CodeGenContext context, AstOperation operation) + { + return GenerateCompare(context, operation, context.Delegates.FOrdNotEqual, context.Delegates.INotEqual); + } + + private static OperationResult GenerateConditionalSelect(CodeGenContext context, AstOperation operation) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + var src3 = operation.GetSource(2); + + var cond = context.Get(AggregateType.Bool, src1); + + if (operation.Inst.HasFlag(Instruction.FP64)) + { + return new OperationResult(AggregateType.FP64, context.Select(context.TypeFP64(), cond, context.GetFP64(src2), context.GetFP64(src3))); + } + else if (operation.Inst.HasFlag(Instruction.FP32)) + { + return new OperationResult(AggregateType.FP32, context.Select(context.TypeFP32(), cond, context.GetFP32(src2), context.GetFP32(src3))); + } + else + { + return new OperationResult(AggregateType.S32, context.Select(context.TypeS32(), cond, context.GetS32(src2), context.GetS32(src3))); + } + } + + private static OperationResult GenerateConvertFP32ToFP64(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.FP64, context.FConvert(context.TypeFP64(), context.GetFP32(source))); + } + + private static OperationResult GenerateConvertFP32ToS32(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.S32, context.ConvertFToS(context.TypeS32(), context.GetFP32(source))); + } + + private static OperationResult GenerateConvertFP32ToU32(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.U32, context.ConvertFToU(context.TypeU32(), context.GetFP32(source))); + } + + private static OperationResult GenerateConvertFP64ToFP32(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.FP32, context.FConvert(context.TypeFP32(), context.GetFP64(source))); + } + + private static OperationResult GenerateConvertFP64ToS32(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.S32, context.ConvertFToS(context.TypeS32(), context.GetFP64(source))); + } + + private static OperationResult GenerateConvertFP64ToU32(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.U32, context.ConvertFToU(context.TypeU32(), context.GetFP64(source))); + } + + private static OperationResult GenerateConvertS32ToFP32(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.FP32, context.ConvertSToF(context.TypeFP32(), context.GetS32(source))); + } + + private static OperationResult GenerateConvertS32ToFP64(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.FP64, context.ConvertSToF(context.TypeFP64(), context.GetS32(source))); + } + + private static OperationResult GenerateConvertU32ToFP32(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.FP32, context.ConvertUToF(context.TypeFP32(), context.GetU32(source))); + } + + private static OperationResult GenerateConvertU32ToFP64(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.FP64, context.ConvertUToF(context.TypeFP64(), context.GetU32(source))); + } + + private static OperationResult GenerateCosine(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslCos, null); + } + + private static OperationResult GenerateDdx(CodeGenContext context, AstOperation operation) + { + return GenerateUnaryFP32(context, operation, context.Delegates.DPdx); + } + + private static OperationResult GenerateDdy(CodeGenContext context, AstOperation operation) + { + return GenerateUnaryFP32(context, operation, context.Delegates.DPdy); + } + + private static OperationResult GenerateDiscard(CodeGenContext context, AstOperation operation) + { + context.Kill(); + return OperationResult.Invalid; + } + + private static OperationResult GenerateDivide(CodeGenContext context, AstOperation operation) + { + return GenerateBinary(context, operation, context.Delegates.FDiv, context.Delegates.SDiv); + } + + private static OperationResult GenerateEmitVertex(CodeGenContext context, AstOperation operation) + { + context.EmitVertex(); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateEndPrimitive(CodeGenContext context, AstOperation operation) + { + context.EndPrimitive(); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateExponentB2(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslExp2, null); + } + + private static OperationResult GenerateFSIBegin(CodeGenContext context, AstOperation operation) + { + if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock()) + { + context.BeginInvocationInterlockEXT(); + } + + return OperationResult.Invalid; + } + + private static OperationResult GenerateFSIEnd(CodeGenContext context, AstOperation operation) + { + if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock()) + { + context.EndInvocationInterlockEXT(); + } + + return OperationResult.Invalid; + } + + private static OperationResult GenerateFindLSB(CodeGenContext context, AstOperation operation) + { + var source = context.GetU32(operation.GetSource(0)); + return new OperationResult(AggregateType.U32, context.GlslFindILsb(context.TypeU32(), source)); + } + + private static OperationResult GenerateFindMSBS32(CodeGenContext context, AstOperation operation) + { + var source = context.GetS32(operation.GetSource(0)); + return new OperationResult(AggregateType.U32, context.GlslFindSMsb(context.TypeU32(), source)); + } + + private static OperationResult GenerateFindMSBU32(CodeGenContext context, AstOperation operation) + { + var source = context.GetU32(operation.GetSource(0)); + return new OperationResult(AggregateType.U32, context.GlslFindUMsb(context.TypeU32(), source)); + } + + private static OperationResult GenerateFloor(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslFloor, null); + } + + private static OperationResult GenerateFusedMultiplyAdd(CodeGenContext context, AstOperation operation) + { + return GenerateTernary(context, operation, context.Delegates.GlslFma, null); + } + + private static OperationResult GenerateGroupMemoryBarrier(CodeGenContext context, AstOperation operation) + { + context.MemoryBarrier(context.Constant(context.TypeU32(), Scope.Workgroup), context.Constant(context.TypeU32(), DefaultMemorySemantics)); + return OperationResult.Invalid; + } + + private static OperationResult GenerateImageAtomic(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + + var componentType = texOp.Format.GetComponentType(); + + // TODO: Bindless texture support. For now we just return 0/do nothing. + if (isBindless) + { + return new OperationResult(componentType, componentType switch + { + AggregateType.S32 => context.Constant(context.TypeS32(), 0), + AggregateType.U32 => context.Constant(context.TypeU32(), 0u), + _ => context.Constant(context.TypeFP32(), 0f), + }); + } + + bool isArray = (texOp.Type & SamplerType.Array) != 0; + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + + int srcIndex = isBindless ? 1 : 0; + + SpvInstruction Src(AggregateType type) + { + return context.Get(type, texOp.GetSource(srcIndex++)); + } + + SpvInstruction index = null; + + if (isIndexed) + { + index = Src(AggregateType.S32); + } + + int coordsCount = texOp.Type.GetDimensions(); + + int pCount = coordsCount + (isArray ? 1 : 0); + + SpvInstruction pCoords; + + if (pCount > 1) + { + SpvInstruction[] elems = new SpvInstruction[pCount]; + + for (int i = 0; i < pCount; i++) + { + elems[i] = Src(AggregateType.S32); + } + + var vectorType = context.TypeVector(context.TypeS32(), pCount); + pCoords = context.CompositeConstruct(vectorType, elems); + } + else + { + pCoords = Src(AggregateType.S32); + } + + SpvInstruction value = Src(componentType); + + (var imageType, var imageVariable) = context.Images[new TextureMeta(texOp.CbufSlot, texOp.Handle, texOp.Format)]; + + var image = context.Load(imageType, imageVariable); + + SpvInstruction resultType = context.GetType(componentType); + SpvInstruction imagePointerType = context.TypePointer(StorageClass.Image, resultType); + + var pointer = context.ImageTexelPointer(imagePointerType, imageVariable, pCoords, context.Constant(context.TypeU32(), 0)); + var one = context.Constant(context.TypeU32(), 1); + var zero = context.Constant(context.TypeU32(), 0); + + var result = (texOp.Flags & TextureFlags.AtomicMask) switch + { + TextureFlags.Add => context.AtomicIAdd(resultType, pointer, one, zero, value), + TextureFlags.Minimum => componentType == AggregateType.S32 + ? context.AtomicSMin(resultType, pointer, one, zero, value) + : context.AtomicUMin(resultType, pointer, one, zero, value), + TextureFlags.Maximum => componentType == AggregateType.S32 + ? context.AtomicSMax(resultType, pointer, one, zero, value) + : context.AtomicUMax(resultType, pointer, one, zero, value), + TextureFlags.Increment => context.AtomicIIncrement(resultType, pointer, one, zero), + TextureFlags.Decrement => context.AtomicIDecrement(resultType, pointer, one, zero), + TextureFlags.BitwiseAnd => context.AtomicAnd(resultType, pointer, one, zero, value), + TextureFlags.BitwiseOr => context.AtomicOr(resultType, pointer, one, zero, value), + TextureFlags.BitwiseXor => context.AtomicXor(resultType, pointer, one, zero, value), + TextureFlags.Swap => context.AtomicExchange(resultType, pointer, one, zero, value), + TextureFlags.CAS => context.AtomicCompareExchange(resultType, pointer, one, zero, zero, Src(componentType), value), + _ => context.AtomicIAdd(resultType, pointer, one, zero, value), + }; + + return new OperationResult(componentType, result); + } + + private static OperationResult GenerateImageLoad(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + + var componentType = texOp.Format.GetComponentType(); + + // TODO: Bindless texture support. For now we just return 0/do nothing. + if (isBindless) + { + return GetZeroOperationResult(context, texOp, componentType, isVector: true); + } + + bool isArray = (texOp.Type & SamplerType.Array) != 0; + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + + int srcIndex = isBindless ? 1 : 0; + + SpvInstruction Src(AggregateType type) + { + return context.Get(type, texOp.GetSource(srcIndex++)); + } + + SpvInstruction index = null; + + if (isIndexed) + { + index = Src(AggregateType.S32); + } + + int coordsCount = texOp.Type.GetDimensions(); + + int pCount = coordsCount + (isArray ? 1 : 0); + + SpvInstruction pCoords; + + if (pCount > 1) + { + SpvInstruction[] elems = new SpvInstruction[pCount]; + + for (int i = 0; i < pCount; i++) + { + elems[i] = Src(AggregateType.S32); + } + + var vectorType = context.TypeVector(context.TypeS32(), pCount); + pCoords = context.CompositeConstruct(vectorType, elems); + } + else + { + pCoords = Src(AggregateType.S32); + } + + pCoords = ScalingHelpers.ApplyScaling(context, texOp, pCoords, intCoords: true, isBindless, isIndexed, isArray, pCount); + + (var imageType, var imageVariable) = context.Images[new TextureMeta(texOp.CbufSlot, texOp.Handle, texOp.Format)]; + + var image = context.Load(imageType, imageVariable); + var imageComponentType = context.GetType(componentType); + var swizzledResultType = texOp.GetVectorType(componentType); + + var texel = context.ImageRead(context.TypeVector(imageComponentType, 4), image, pCoords, ImageOperandsMask.MaskNone); + var result = GetSwizzledResult(context, texel, swizzledResultType, texOp.Index); + + return new OperationResult(componentType, result); + } + + private static OperationResult GenerateImageStore(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + + // TODO: Bindless texture support. For now we just return 0/do nothing. + if (isBindless) + { + return OperationResult.Invalid; + } + + bool isArray = (texOp.Type & SamplerType.Array) != 0; + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + + int srcIndex = isBindless ? 1 : 0; + + SpvInstruction Src(AggregateType type) + { + return context.Get(type, texOp.GetSource(srcIndex++)); + } + + SpvInstruction index = null; + + if (isIndexed) + { + index = Src(AggregateType.S32); + } + + int coordsCount = texOp.Type.GetDimensions(); + + int pCount = coordsCount + (isArray ? 1 : 0); + + SpvInstruction pCoords; + + if (pCount > 1) + { + SpvInstruction[] elems = new SpvInstruction[pCount]; + + for (int i = 0; i < pCount; i++) + { + elems[i] = Src(AggregateType.S32); + } + + var vectorType = context.TypeVector(context.TypeS32(), pCount); + pCoords = context.CompositeConstruct(vectorType, elems); + } + else + { + pCoords = Src(AggregateType.S32); + } + + var componentType = texOp.Format.GetComponentType(); + + const int ComponentsCount = 4; + + SpvInstruction[] cElems = new SpvInstruction[ComponentsCount]; + + for (int i = 0; i < ComponentsCount; i++) + { + if (srcIndex < texOp.SourcesCount) + { + cElems[i] = Src(componentType); + } + else + { + cElems[i] = componentType switch + { + AggregateType.S32 => context.Constant(context.TypeS32(), 0), + AggregateType.U32 => context.Constant(context.TypeU32(), 0u), + _ => context.Constant(context.TypeFP32(), 0f), + }; + } + } + + var texel = context.CompositeConstruct(context.TypeVector(context.GetType(componentType), ComponentsCount), cElems); + + (var imageType, var imageVariable) = context.Images[new TextureMeta(texOp.CbufSlot, texOp.Handle, texOp.Format)]; + + var image = context.Load(imageType, imageVariable); + + context.ImageWrite(image, pCoords, texel, ImageOperandsMask.MaskNone); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateIsNan(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + SpvInstruction result; + + if (operation.Inst.HasFlag(Instruction.FP64)) + { + result = context.IsNan(context.TypeBool(), context.GetFP64(source)); + } + else + { + result = context.IsNan(context.TypeBool(), context.GetFP32(source)); + } + + return new OperationResult(AggregateType.Bool, result); + } + + private static OperationResult GenerateLoad(CodeGenContext context, AstOperation operation) + { + return GenerateLoadOrStore(context, operation, isStore: false); + } + + private static OperationResult GenerateLoadConstant(CodeGenContext context, AstOperation operation) + { + var src1 = operation.GetSource(0); + var src2 = context.Get(AggregateType.S32, operation.GetSource(1)); + + var i1 = context.Constant(context.TypeS32(), 0); + var i2 = context.ShiftRightArithmetic(context.TypeS32(), src2, context.Constant(context.TypeS32(), 2)); + var i3 = context.BitwiseAnd(context.TypeS32(), src2, context.Constant(context.TypeS32(), 3)); + + SpvInstruction value = null; + + if (context.Config.GpuAccessor.QueryHostHasVectorIndexingBug()) + { + // Test for each component individually. + for (int i = 0; i < 4; i++) + { + var component = context.Constant(context.TypeS32(), i); + + SpvInstruction elemPointer; + if (context.UniformBuffersArray != null) + { + var ubVariable = context.UniformBuffersArray; + var i0 = context.Get(AggregateType.S32, src1); + + elemPointer = context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeFP32()), ubVariable, i0, i1, i2, component); + } + else + { + var ubVariable = context.UniformBuffers[((AstOperand)src1).Value]; + + elemPointer = context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeFP32()), ubVariable, i1, i2, component); + } + + SpvInstruction newValue = context.Load(context.TypeFP32(), elemPointer); + + value = value != null ? context.Select(context.TypeFP32(), context.IEqual(context.TypeBool(), i3, component), newValue, value) : newValue; + } + } + else + { + SpvInstruction elemPointer; + + if (context.UniformBuffersArray != null) + { + var ubVariable = context.UniformBuffersArray; + var i0 = context.Get(AggregateType.S32, src1); + + elemPointer = context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeFP32()), ubVariable, i0, i1, i2, i3); + } + else + { + var ubVariable = context.UniformBuffers[((AstOperand)src1).Value]; + + elemPointer = context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeFP32()), ubVariable, i1, i2, i3); + } + + value = context.Load(context.TypeFP32(), elemPointer); + } + + return new OperationResult(AggregateType.FP32, value); + } + + private static OperationResult GenerateLoadLocal(CodeGenContext context, AstOperation operation) + { + return GenerateLoadLocalOrShared(context, operation, StorageClass.Private, context.LocalMemory); + } + + private static OperationResult GenerateLoadShared(CodeGenContext context, AstOperation operation) + { + return GenerateLoadLocalOrShared(context, operation, StorageClass.Workgroup, context.SharedMemory); + } + + private static OperationResult GenerateLoadLocalOrShared( + CodeGenContext context, + AstOperation operation, + StorageClass storageClass, + SpvInstruction memory) + { + var offset = context.Get(AggregateType.S32, operation.GetSource(0)); + + var elemPointer = context.AccessChain(context.TypePointer(storageClass, context.TypeU32()), memory, offset); + var value = context.Load(context.TypeU32(), elemPointer); + + return new OperationResult(AggregateType.U32, value); + } + + private static OperationResult GenerateLoadStorage(CodeGenContext context, AstOperation operation) + { + var elemPointer = GetStorageElemPointer(context, operation); + var value = context.Load(context.TypeU32(), elemPointer); + + return new OperationResult(AggregateType.U32, value); + } + + private static OperationResult GenerateLod(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + + // TODO: Bindless texture support. For now we just return 0. + if (isBindless) + { + return new OperationResult(AggregateType.S32, context.Constant(context.TypeS32(), 0)); + } + + int srcIndex = 0; + + SpvInstruction Src(AggregateType type) + { + return context.Get(type, texOp.GetSource(srcIndex++)); + } + + SpvInstruction index = null; + + if (isIndexed) + { + index = Src(AggregateType.S32); + } + + int pCount = texOp.Type.GetDimensions(); + + SpvInstruction pCoords; + + if (pCount > 1) + { + SpvInstruction[] elems = new SpvInstruction[pCount]; + + for (int i = 0; i < pCount; i++) + { + elems[i] = Src(AggregateType.FP32); + } + + var vectorType = context.TypeVector(context.TypeFP32(), pCount); + pCoords = context.CompositeConstruct(vectorType, elems); + } + else + { + pCoords = Src(AggregateType.FP32); + } + + var meta = new TextureMeta(texOp.CbufSlot, texOp.Handle, texOp.Format); + + (_, var sampledImageType, var sampledImageVariable) = context.Samplers[meta]; + + var image = context.Load(sampledImageType, sampledImageVariable); + + var resultType = context.TypeVector(context.TypeFP32(), 2); + var packed = context.ImageQueryLod(resultType, image, pCoords); + var result = context.CompositeExtract(context.TypeFP32(), packed, (SpvLiteralInteger)texOp.Index); + + return new OperationResult(AggregateType.FP32, result); + } + + private static OperationResult GenerateLogarithmB2(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslLog2, null); + } + + private static OperationResult GenerateLogicalAnd(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryBool(context, operation, context.Delegates.LogicalAnd); + } + + private static OperationResult GenerateLogicalExclusiveOr(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryBool(context, operation, context.Delegates.LogicalNotEqual); + } + + private static OperationResult GenerateLogicalNot(CodeGenContext context, AstOperation operation) + { + return GenerateUnaryBool(context, operation, context.Delegates.LogicalNot); + } + + private static OperationResult GenerateLogicalOr(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryBool(context, operation, context.Delegates.LogicalOr); + } + + private static OperationResult GenerateLoopBreak(CodeGenContext context, AstOperation operation) + { + AstBlock loopBlock = context.CurrentBlock; + while (loopBlock.Type != AstBlockType.DoWhile) + { + loopBlock = loopBlock.Parent; + } + + context.Branch(context.GetNextLabel(loopBlock.Parent)); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateLoopContinue(CodeGenContext context, AstOperation operation) + { + AstBlock loopBlock = context.CurrentBlock; + while (loopBlock.Type != AstBlockType.DoWhile) + { + loopBlock = loopBlock.Parent; + } + + (var loopTarget, var continueTarget) = context.LoopTargets[loopBlock]; + + context.Branch(continueTarget); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateMaximum(CodeGenContext context, AstOperation operation) + { + return GenerateBinary(context, operation, context.Delegates.GlslFMax, context.Delegates.GlslSMax); + } + + private static OperationResult GenerateMaximumU32(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryU32(context, operation, context.Delegates.GlslUMax); + } + + private static OperationResult GenerateMemoryBarrier(CodeGenContext context, AstOperation operation) + { + context.MemoryBarrier(context.Constant(context.TypeU32(), Scope.Device), context.Constant(context.TypeU32(), DefaultMemorySemantics)); + return OperationResult.Invalid; + } + + private static OperationResult GenerateMinimum(CodeGenContext context, AstOperation operation) + { + return GenerateBinary(context, operation, context.Delegates.GlslFMin, context.Delegates.GlslSMin); + } + + private static OperationResult GenerateMinimumU32(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryU32(context, operation, context.Delegates.GlslUMin); + } + + private static OperationResult GenerateMultiply(CodeGenContext context, AstOperation operation) + { + return GenerateBinary(context, operation, context.Delegates.FMul, context.Delegates.IMul); + } + + private static OperationResult GenerateMultiplyHighS32(CodeGenContext context, AstOperation operation) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + var resultType = context.TypeStruct(false, context.TypeS32(), context.TypeS32()); + var result = context.SMulExtended(resultType, context.GetS32(src1), context.GetS32(src2)); + result = context.CompositeExtract(context.TypeS32(), result, 1); + + return new OperationResult(AggregateType.S32, result); + } + + private static OperationResult GenerateMultiplyHighU32(CodeGenContext context, AstOperation operation) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + var resultType = context.TypeStruct(false, context.TypeU32(), context.TypeU32()); + var result = context.UMulExtended(resultType, context.GetU32(src1), context.GetU32(src2)); + result = context.CompositeExtract(context.TypeU32(), result, 1); + + return new OperationResult(AggregateType.U32, result); + } + + private static OperationResult GenerateNegate(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.FNegate, context.Delegates.SNegate); + } + + private static OperationResult GeneratePackDouble2x32(CodeGenContext context, AstOperation operation) + { + var value0 = context.GetU32(operation.GetSource(0)); + var value1 = context.GetU32(operation.GetSource(1)); + var vector = context.CompositeConstruct(context.TypeVector(context.TypeU32(), 2), value0, value1); + var result = context.GlslPackDouble2x32(context.TypeFP64(), vector); + + return new OperationResult(AggregateType.FP64, result); + } + + private static OperationResult GeneratePackHalf2x16(CodeGenContext context, AstOperation operation) + { + var value0 = context.GetFP32(operation.GetSource(0)); + var value1 = context.GetFP32(operation.GetSource(1)); + var vector = context.CompositeConstruct(context.TypeVector(context.TypeFP32(), 2), value0, value1); + var result = context.GlslPackHalf2x16(context.TypeU32(), vector); + + return new OperationResult(AggregateType.U32, result); + } + + private static OperationResult GenerateReciprocalSquareRoot(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslInverseSqrt, null); + } + + private static OperationResult GenerateReturn(CodeGenContext context, AstOperation operation) + { + context.Return(); + return OperationResult.Invalid; + } + + private static OperationResult GenerateRound(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslRoundEven, null); + } + + private static OperationResult GenerateShiftLeft(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryS32(context, operation, context.Delegates.ShiftLeftLogical); + } + + private static OperationResult GenerateShiftRightS32(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryS32(context, operation, context.Delegates.ShiftRightArithmetic); + } + + private static OperationResult GenerateShiftRightU32(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryS32(context, operation, context.Delegates.ShiftRightLogical); + } + + private static OperationResult GenerateShuffle(CodeGenContext context, AstOperation operation) + { + var x = context.GetFP32(operation.GetSource(0)); + var index = context.GetU32(operation.GetSource(1)); + var mask = context.GetU32(operation.GetSource(2)); + + var const31 = context.Constant(context.TypeU32(), 31); + var const8 = context.Constant(context.TypeU32(), 8); + + var clamp = context.BitwiseAnd(context.TypeU32(), mask, const31); + var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31); + var notSegMask = context.Not(context.TypeU32(), segMask); + var clampNotSegMask = context.BitwiseAnd(context.TypeU32(), clamp, notSegMask); + var indexNotSegMask = context.BitwiseAnd(context.TypeU32(), index, notSegMask); + + var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId); + + var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask); + var maxThreadId = context.BitwiseOr(context.TypeU32(), minThreadId, clampNotSegMask); + var srcThreadId = context.BitwiseOr(context.TypeU32(), indexNotSegMask, minThreadId); + var valid = context.ULessThanEqual(context.TypeBool(), srcThreadId, maxThreadId); + var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId); + var result = context.Select(context.TypeFP32(), valid, value, x); + + var validLocal = (AstOperand)operation.GetSource(3); + + context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid)); + + return new OperationResult(AggregateType.FP32, result); + } + + private static OperationResult GenerateShuffleDown(CodeGenContext context, AstOperation operation) + { + var x = context.GetFP32(operation.GetSource(0)); + var index = context.GetU32(operation.GetSource(1)); + var mask = context.GetU32(operation.GetSource(2)); + + var const31 = context.Constant(context.TypeU32(), 31); + var const8 = context.Constant(context.TypeU32(), 8); + + var clamp = context.BitwiseAnd(context.TypeU32(), mask, const31); + var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31); + var notSegMask = context.Not(context.TypeU32(), segMask); + var clampNotSegMask = context.BitwiseAnd(context.TypeU32(), clamp, notSegMask); + + var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId); + + var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask); + var maxThreadId = context.BitwiseOr(context.TypeU32(), minThreadId, clampNotSegMask); + var srcThreadId = context.IAdd(context.TypeU32(), threadId, index); + var valid = context.ULessThanEqual(context.TypeBool(), srcThreadId, maxThreadId); + var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId); + var result = context.Select(context.TypeFP32(), valid, value, x); + + var validLocal = (AstOperand)operation.GetSource(3); + + context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid)); + + return new OperationResult(AggregateType.FP32, result); + } + + private static OperationResult GenerateShuffleUp(CodeGenContext context, AstOperation operation) + { + var x = context.GetFP32(operation.GetSource(0)); + var index = context.GetU32(operation.GetSource(1)); + var mask = context.GetU32(operation.GetSource(2)); + + var const31 = context.Constant(context.TypeU32(), 31); + var const8 = context.Constant(context.TypeU32(), 8); + + var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31); + + var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId); + + var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask); + var srcThreadId = context.ISub(context.TypeU32(), threadId, index); + var valid = context.SGreaterThanEqual(context.TypeBool(), srcThreadId, minThreadId); + var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId); + var result = context.Select(context.TypeFP32(), valid, value, x); + + var validLocal = (AstOperand)operation.GetSource(3); + + context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid)); + + return new OperationResult(AggregateType.FP32, result); + } + + private static OperationResult GenerateShuffleXor(CodeGenContext context, AstOperation operation) + { + var x = context.GetFP32(operation.GetSource(0)); + var index = context.GetU32(operation.GetSource(1)); + var mask = context.GetU32(operation.GetSource(2)); + + var const31 = context.Constant(context.TypeU32(), 31); + var const8 = context.Constant(context.TypeU32(), 8); + + var clamp = context.BitwiseAnd(context.TypeU32(), mask, const31); + var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31); + var notSegMask = context.Not(context.TypeU32(), segMask); + var clampNotSegMask = context.BitwiseAnd(context.TypeU32(), clamp, notSegMask); + + var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId); + + var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask); + var maxThreadId = context.BitwiseOr(context.TypeU32(), minThreadId, clampNotSegMask); + var srcThreadId = context.BitwiseXor(context.TypeU32(), threadId, index); + var valid = context.ULessThanEqual(context.TypeBool(), srcThreadId, maxThreadId); + var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId); + var result = context.Select(context.TypeFP32(), valid, value, x); + + var validLocal = (AstOperand)operation.GetSource(3); + + context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid)); + + return new OperationResult(AggregateType.FP32, result); + } + + private static OperationResult GenerateSine(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslSin, null); + } + + private static OperationResult GenerateSquareRoot(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslSqrt, null); + } + + private static OperationResult GenerateStore(CodeGenContext context, AstOperation operation) + { + return GenerateLoadOrStore(context, operation, isStore: true); + } + + private static OperationResult GenerateStoreLocal(CodeGenContext context, AstOperation operation) + { + return GenerateStoreLocalOrShared(context, operation, StorageClass.Private, context.LocalMemory); + } + + private static OperationResult GenerateStoreShared(CodeGenContext context, AstOperation operation) + { + return GenerateStoreLocalOrShared(context, operation, StorageClass.Workgroup, context.SharedMemory); + } + + private static OperationResult GenerateStoreLocalOrShared( + CodeGenContext context, + AstOperation operation, + StorageClass storageClass, + SpvInstruction memory) + { + var offset = context.Get(AggregateType.S32, operation.GetSource(0)); + var value = context.Get(AggregateType.U32, operation.GetSource(1)); + + var elemPointer = context.AccessChain(context.TypePointer(storageClass, context.TypeU32()), memory, offset); + context.Store(elemPointer, value); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateStoreShared16(CodeGenContext context, AstOperation operation) + { + GenerateStoreSharedSmallInt(context, operation, 16); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateStoreShared8(CodeGenContext context, AstOperation operation) + { + GenerateStoreSharedSmallInt(context, operation, 8); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateStoreStorage(CodeGenContext context, AstOperation operation) + { + var elemPointer = GetStorageElemPointer(context, operation); + context.Store(elemPointer, context.Get(AggregateType.U32, operation.GetSource(2))); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateStoreStorage16(CodeGenContext context, AstOperation operation) + { + GenerateStoreStorageSmallInt(context, operation, 16); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateStoreStorage8(CodeGenContext context, AstOperation operation) + { + GenerateStoreStorageSmallInt(context, operation, 8); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateSubtract(CodeGenContext context, AstOperation operation) + { + return GenerateBinary(context, operation, context.Delegates.FSub, context.Delegates.ISub); + } + + private static OperationResult GenerateSwizzleAdd(CodeGenContext context, AstOperation operation) + { + var x = context.Get(AggregateType.FP32, operation.GetSource(0)); + var y = context.Get(AggregateType.FP32, operation.GetSource(1)); + var mask = context.Get(AggregateType.U32, operation.GetSource(2)); + + var v4float = context.TypeVector(context.TypeFP32(), 4); + var one = context.Constant(context.TypeFP32(), 1.0f); + var minusOne = context.Constant(context.TypeFP32(), -1.0f); + var zero = context.Constant(context.TypeFP32(), 0.0f); + var xLut = context.ConstantComposite(v4float, one, minusOne, one, zero); + var yLut = context.ConstantComposite(v4float, one, one, minusOne, one); + + var three = context.Constant(context.TypeU32(), 3); + + var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId); + var shift = context.BitwiseAnd(context.TypeU32(), threadId, three); + shift = context.ShiftLeftLogical(context.TypeU32(), shift, context.Constant(context.TypeU32(), 1)); + var lutIdx = context.ShiftRightLogical(context.TypeU32(), mask, shift); + lutIdx = context.BitwiseAnd(context.TypeU32(), lutIdx, three); + + var xLutValue = context.VectorExtractDynamic(context.TypeFP32(), xLut, lutIdx); + var yLutValue = context.VectorExtractDynamic(context.TypeFP32(), yLut, lutIdx); + + var xResult = context.FMul(context.TypeFP32(), x, xLutValue); + var yResult = context.FMul(context.TypeFP32(), y, yLutValue); + var result = context.FAdd(context.TypeFP32(), xResult, yResult); + + return new OperationResult(AggregateType.FP32, result); + } + + private static OperationResult GenerateTextureSample(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + bool isGather = (texOp.Flags & TextureFlags.Gather) != 0; + bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0; + bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0; + bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0; + bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0; + bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0; + bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0; + + bool isArray = (texOp.Type & SamplerType.Array) != 0; + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0; + bool isShadow = (texOp.Type & SamplerType.Shadow) != 0; + + bool colorIsVector = isGather || !isShadow; + + // TODO: Bindless texture support. For now we just return 0. + if (isBindless) + { + return GetZeroOperationResult(context, texOp, AggregateType.FP32, colorIsVector); + } + + // This combination is valid, but not available on GLSL. + // For now, ignore the LOD level and do a normal sample. + // TODO: How to implement it properly? + if (hasLodLevel && isArray && isShadow) + { + hasLodLevel = false; + } + + int srcIndex = isBindless ? 1 : 0; + + SpvInstruction Src(AggregateType type) + { + return context.Get(type, texOp.GetSource(srcIndex++)); + } + + SpvInstruction index = null; + + if (isIndexed) + { + index = Src(AggregateType.S32); + } + + int coordsCount = texOp.Type.GetDimensions(); + + int pCount = coordsCount; + + int arrayIndexElem = -1; + + if (isArray) + { + arrayIndexElem = pCount++; + } + + AggregateType coordType = intCoords ? AggregateType.S32 : AggregateType.FP32; + + SpvInstruction AssemblePVector(int count) + { + if (count > 1) + { + SpvInstruction[] elems = new SpvInstruction[count]; + + for (int index = 0; index < count; index++) + { + if (arrayIndexElem == index) + { + elems[index] = Src(AggregateType.S32); + + if (!intCoords) + { + elems[index] = context.ConvertSToF(context.TypeFP32(), elems[index]); + } + } + else + { + elems[index] = Src(coordType); + } + } + + var vectorType = context.TypeVector(intCoords ? context.TypeS32() : context.TypeFP32(), count); + return context.CompositeConstruct(vectorType, elems); + } + else + { + return Src(coordType); + } + } + + SpvInstruction ApplyBias(SpvInstruction vector, SpvInstruction image) + { + int gatherBiasPrecision = context.Config.GpuAccessor.QueryHostGatherBiasPrecision(); + if (isGather && gatherBiasPrecision != 0) + { + // GPU requires texture gather to be slightly offset to match NVIDIA behaviour when point is exactly between two texels. + // Offset by the gather precision divided by 2 to correct for rounding. + var sizeType = pCount == 1 ? context.TypeS32() : context.TypeVector(context.TypeS32(), pCount); + var pVectorType = pCount == 1 ? context.TypeFP32() : context.TypeVector(context.TypeFP32(), pCount); + + var bias = context.Constant(context.TypeFP32(), (float)(1 << (gatherBiasPrecision + 1))); + var biasVector = context.CompositeConstruct(pVectorType, Enumerable.Repeat(bias, pCount).ToArray()); + + var one = context.Constant(context.TypeFP32(), 1f); + var oneVector = context.CompositeConstruct(pVectorType, Enumerable.Repeat(one, pCount).ToArray()); + + var divisor = context.FMul( + pVectorType, + context.ConvertSToF(pVectorType, context.ImageQuerySize(sizeType, image)), + biasVector); + + vector = context.FAdd(pVectorType, vector, context.FDiv(pVectorType, oneVector, divisor)); + } + + return vector; + } + + SpvInstruction pCoords = AssemblePVector(pCount); + pCoords = ScalingHelpers.ApplyScaling(context, texOp, pCoords, intCoords, isBindless, isIndexed, isArray, pCount); + + SpvInstruction AssembleDerivativesVector(int count) + { + if (count > 1) + { + SpvInstruction[] elems = new SpvInstruction[count]; + + for (int index = 0; index < count; index++) + { + elems[index] = Src(AggregateType.FP32); + } + + var vectorType = context.TypeVector(context.TypeFP32(), count); + return context.CompositeConstruct(vectorType, elems); + } + else + { + return Src(AggregateType.FP32); + } + } + + SpvInstruction dRef = null; + + if (isShadow) + { + dRef = Src(AggregateType.FP32); + } + + SpvInstruction[] derivatives = null; + + if (hasDerivatives) + { + derivatives = new[] + { + AssembleDerivativesVector(coordsCount), // dPdx + AssembleDerivativesVector(coordsCount) // dPdy + }; + } + + SpvInstruction sample = null; + SpvInstruction lod = null; + + if (isMultisample) + { + sample = Src(AggregateType.S32); + } + else if (hasLodLevel) + { + lod = Src(coordType); + } + + SpvInstruction AssembleOffsetVector(int count) + { + if (count > 1) + { + SpvInstruction[] elems = new SpvInstruction[count]; + + for (int index = 0; index < count; index++) + { + elems[index] = Src(AggregateType.S32); + } + + var vectorType = context.TypeVector(context.TypeS32(), count); + + return context.ConstantComposite(vectorType, elems); + } + else + { + return Src(AggregateType.S32); + } + } + + SpvInstruction[] offsets = null; + + if (hasOffset) + { + offsets = new[] { AssembleOffsetVector(coordsCount) }; + } + else if (hasOffsets) + { + offsets = new[] + { + AssembleOffsetVector(coordsCount), + AssembleOffsetVector(coordsCount), + AssembleOffsetVector(coordsCount), + AssembleOffsetVector(coordsCount) + }; + } + + SpvInstruction lodBias = null; + + if (hasLodBias) + { + lodBias = Src(AggregateType.FP32); + } + + SpvInstruction compIdx = null; + + // textureGather* optional extra component index, + // not needed for shadow samplers. + if (isGather && !isShadow) + { + compIdx = Src(AggregateType.S32); + } + + var operandsList = new List<SpvInstruction>(); + var operandsMask = ImageOperandsMask.MaskNone; + + if (hasLodBias) + { + operandsMask |= ImageOperandsMask.Bias; + operandsList.Add(lodBias); + } + + if (!isMultisample && hasLodLevel) + { + operandsMask |= ImageOperandsMask.Lod; + operandsList.Add(lod); + } + + if (hasDerivatives) + { + operandsMask |= ImageOperandsMask.Grad; + operandsList.Add(derivatives[0]); + operandsList.Add(derivatives[1]); + } + + if (hasOffset) + { + operandsMask |= ImageOperandsMask.ConstOffset; + operandsList.Add(offsets[0]); + } + else if (hasOffsets) + { + operandsMask |= ImageOperandsMask.ConstOffsets; + SpvInstruction arrayv2 = context.TypeArray(context.TypeVector(context.TypeS32(), 2), context.Constant(context.TypeU32(), 4)); + operandsList.Add(context.ConstantComposite(arrayv2, offsets[0], offsets[1], offsets[2], offsets[3])); + } + + if (isMultisample) + { + operandsMask |= ImageOperandsMask.Sample; + operandsList.Add(sample); + } + + var resultType = colorIsVector ? context.TypeVector(context.TypeFP32(), 4) : context.TypeFP32(); + + var meta = new TextureMeta(texOp.CbufSlot, texOp.Handle, texOp.Format); + + (var imageType, var sampledImageType, var sampledImageVariable) = context.Samplers[meta]; + + var image = context.Load(sampledImageType, sampledImageVariable); + + if (intCoords) + { + image = context.Image(imageType, image); + } + + pCoords = ApplyBias(pCoords, image); + + var operands = operandsList.ToArray(); + + SpvInstruction result; + + if (intCoords) + { + result = context.ImageFetch(resultType, image, pCoords, operandsMask, operands); + } + else if (isGather) + { + if (isShadow) + { + result = context.ImageDrefGather(resultType, image, pCoords, dRef, operandsMask, operands); + } + else + { + result = context.ImageGather(resultType, image, pCoords, compIdx, operandsMask, operands); + } + } + else if (isShadow) + { + if (hasLodLevel) + { + result = context.ImageSampleDrefExplicitLod(resultType, image, pCoords, dRef, operandsMask, operands); + } + else + { + result = context.ImageSampleDrefImplicitLod(resultType, image, pCoords, dRef, operandsMask, operands); + } + } + else if (hasDerivatives || hasLodLevel) + { + result = context.ImageSampleExplicitLod(resultType, image, pCoords, operandsMask, operands); + } + else + { + result = context.ImageSampleImplicitLod(resultType, image, pCoords, operandsMask, operands); + } + + var swizzledResultType = AggregateType.FP32; + + if (colorIsVector) + { + swizzledResultType = texOp.GetVectorType(swizzledResultType); + + result = GetSwizzledResult(context, result, swizzledResultType, texOp.Index); + } + + return new OperationResult(swizzledResultType, result); + } + + private static OperationResult GenerateTextureSize(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + + // TODO: Bindless texture support. For now we just return 0. + if (isBindless) + { + return new OperationResult(AggregateType.S32, context.Constant(context.TypeS32(), 0)); + } + + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + + SpvInstruction index = null; + + if (isIndexed) + { + index = context.GetS32(texOp.GetSource(0)); + } + + var meta = new TextureMeta(texOp.CbufSlot, texOp.Handle, texOp.Format); + + (var imageType, var sampledImageType, var sampledImageVariable) = context.Samplers[meta]; + + var image = context.Load(sampledImageType, sampledImageVariable); + image = context.Image(imageType, image); + + if (texOp.Index == 3) + { + return new OperationResult(AggregateType.S32, context.ImageQueryLevels(context.TypeS32(), image)); + } + else + { + var type = context.SamplersTypes[meta]; + bool hasLod = !type.HasFlag(SamplerType.Multisample) && type != SamplerType.TextureBuffer; + + int dimensions = (type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : type.GetDimensions(); + + if (type.HasFlag(SamplerType.Array)) + { + dimensions++; + } + + var resultType = dimensions == 1 ? context.TypeS32() : context.TypeVector(context.TypeS32(), dimensions); + + SpvInstruction result; + + if (hasLod) + { + int lodSrcIndex = isBindless || isIndexed ? 1 : 0; + var lod = context.GetS32(operation.GetSource(lodSrcIndex)); + result = context.ImageQuerySizeLod(resultType, image, lod); + } + else + { + result = context.ImageQuerySize(resultType, image); + } + + if (dimensions != 1) + { + result = context.CompositeExtract(context.TypeS32(), result, (SpvLiteralInteger)texOp.Index); + } + + if (texOp.Index < 2 || (type & SamplerType.Mask) == SamplerType.Texture3D) + { + result = ScalingHelpers.ApplyUnscaling(context, texOp.WithType(type), result, isBindless, isIndexed); + } + + return new OperationResult(AggregateType.S32, result); + } + } + + private static OperationResult GenerateTruncate(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslTrunc, null); + } + + private static OperationResult GenerateUnpackDouble2x32(CodeGenContext context, AstOperation operation) + { + var value = context.GetFP64(operation.GetSource(0)); + var vector = context.GlslUnpackDouble2x32(context.TypeVector(context.TypeU32(), 2), value); + var result = context.CompositeExtract(context.TypeU32(), vector, operation.Index); + + return new OperationResult(AggregateType.U32, result); + } + + private static OperationResult GenerateUnpackHalf2x16(CodeGenContext context, AstOperation operation) + { + var value = context.GetU32(operation.GetSource(0)); + var vector = context.GlslUnpackHalf2x16(context.TypeVector(context.TypeFP32(), 2), value); + var result = context.CompositeExtract(context.TypeFP32(), vector, operation.Index); + + return new OperationResult(AggregateType.FP32, result); + } + + private static OperationResult GenerateVectorExtract(CodeGenContext context, AstOperation operation) + { + var vector = context.GetWithType(operation.GetSource(0), out AggregateType vectorType); + var scalarType = vectorType & ~AggregateType.ElementCountMask; + var resultType = context.GetType(scalarType); + SpvInstruction result; + + if (operation.GetSource(1) is AstOperand indexOperand && indexOperand.Type == OperandType.Constant) + { + result = context.CompositeExtract(resultType, vector, (SpvLiteralInteger)indexOperand.Value); + } + else + { + var index = context.Get(AggregateType.S32, operation.GetSource(1)); + result = context.VectorExtractDynamic(resultType, vector, index); + } + + return new OperationResult(scalarType, result); + } + + private static OperationResult GenerateVoteAll(CodeGenContext context, AstOperation operation) + { + var execution = context.Constant(context.TypeU32(), Scope.Subgroup); + var result = context.GroupNonUniformAll(context.TypeBool(), execution, context.Get(AggregateType.Bool, operation.GetSource(0))); + return new OperationResult(AggregateType.Bool, result); + } + + private static OperationResult GenerateVoteAllEqual(CodeGenContext context, AstOperation operation) + { + var execution = context.Constant(context.TypeU32(), Scope.Subgroup); + var result = context.GroupNonUniformAllEqual(context.TypeBool(), execution, context.Get(AggregateType.Bool, operation.GetSource(0))); + return new OperationResult(AggregateType.Bool, result); + } + + private static OperationResult GenerateVoteAny(CodeGenContext context, AstOperation operation) + { + var execution = context.Constant(context.TypeU32(), Scope.Subgroup); + var result = context.GroupNonUniformAny(context.TypeBool(), execution, context.Get(AggregateType.Bool, operation.GetSource(0))); + return new OperationResult(AggregateType.Bool, result); + } + + private static OperationResult GenerateCompare( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitF, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitI) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + SpvInstruction result; + + if (operation.Inst.HasFlag(Instruction.FP64)) + { + result = emitF(context.TypeBool(), context.GetFP64(src1), context.GetFP64(src2)); + } + else if (operation.Inst.HasFlag(Instruction.FP32)) + { + result = emitF(context.TypeBool(), context.GetFP32(src1), context.GetFP32(src2)); + } + else + { + result = emitI(context.TypeBool(), context.GetS32(src1), context.GetS32(src2)); + } + + return new OperationResult(AggregateType.Bool, result); + } + + private static OperationResult GenerateCompareU32( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitU) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + var result = emitU(context.TypeBool(), context.GetU32(src1), context.GetU32(src2)); + + return new OperationResult(AggregateType.Bool, result); + } + + private static OperationResult GenerateAtomicMemoryBinary( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitU) + { + var value = context.GetU32(operation.GetSource(2)); + + SpvInstruction elemPointer; + + if (operation.StorageKind == StorageKind.StorageBuffer) + { + elemPointer = GetStorageElemPointer(context, operation); + } + else if (operation.StorageKind == StorageKind.SharedMemory) + { + var offset = context.GetU32(operation.GetSource(0)); + elemPointer = context.AccessChain(context.TypePointer(StorageClass.Workgroup, context.TypeU32()), context.SharedMemory, offset); + } + else + { + throw new InvalidOperationException($"Invalid storage kind \"{operation.StorageKind}\"."); + } + + var one = context.Constant(context.TypeU32(), 1); + var zero = context.Constant(context.TypeU32(), 0); + + return new OperationResult(AggregateType.U32, emitU(context.TypeU32(), elemPointer, one, zero, value)); + } + + private static OperationResult GenerateAtomicMemoryCas(CodeGenContext context, AstOperation operation) + { + var value0 = context.GetU32(operation.GetSource(2)); + var value1 = context.GetU32(operation.GetSource(3)); + + SpvInstruction elemPointer; + + if (operation.StorageKind == StorageKind.StorageBuffer) + { + elemPointer = GetStorageElemPointer(context, operation); + } + else if (operation.StorageKind == StorageKind.SharedMemory) + { + var offset = context.GetU32(operation.GetSource(0)); + elemPointer = context.AccessChain(context.TypePointer(StorageClass.Workgroup, context.TypeU32()), context.SharedMemory, offset); + } + else + { + throw new InvalidOperationException($"Invalid storage kind \"{operation.StorageKind}\"."); + } + + var one = context.Constant(context.TypeU32(), 1); + var zero = context.Constant(context.TypeU32(), 0); + + return new OperationResult(AggregateType.U32, context.AtomicCompareExchange(context.TypeU32(), elemPointer, one, zero, zero, value1, value0)); + } + + private static OperationResult GenerateLoadOrStore(CodeGenContext context, AstOperation operation, bool isStore) + { + StorageKind storageKind = operation.StorageKind; + + SpvInstruction pointer; + AggregateType varType; + int srcIndex = 0; + + switch (storageKind) + { + case StorageKind.Input: + case StorageKind.InputPerPatch: + case StorageKind.Output: + case StorageKind.OutputPerPatch: + if (!(operation.GetSource(srcIndex++) is AstOperand varId) || varId.Type != OperandType.Constant) + { + throw new InvalidOperationException($"First input of {operation.Inst} with {storageKind} storage must be a constant operand."); + } + + IoVariable ioVariable = (IoVariable)varId.Value; + bool isOutput = storageKind.IsOutput(); + bool isPerPatch = storageKind.IsPerPatch(); + int location = 0; + int component = 0; + + if (context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput)) + { + if (!(operation.GetSource(srcIndex++) is AstOperand vecIndex) || vecIndex.Type != OperandType.Constant) + { + throw new InvalidOperationException($"Second input of {operation.Inst} with {storageKind} storage must be a constant operand."); + } + + location = vecIndex.Value; + + if (operation.SourcesCount > srcIndex && + operation.GetSource(srcIndex) is AstOperand elemIndex && + elemIndex.Type == OperandType.Constant && + context.Config.HasPerLocationInputOrOutputComponent(ioVariable, location, elemIndex.Value, isOutput)) + { + component = elemIndex.Value; + srcIndex++; + } + } + + if (ioVariable == IoVariable.UserDefined) + { + varType = context.Config.GetUserDefinedType(location, isOutput); + } + else if (ioVariable == IoVariable.FragmentOutputColor) + { + varType = context.Config.GetFragmentOutputColorType(location); + } + else if (ioVariable == IoVariable.FragmentOutputIsBgra) + { + var pointerType = context.TypePointer(StorageClass.Uniform, context.TypeU32()); + var elemIndex = context.Get(AggregateType.S32, operation.GetSource(srcIndex++)); + pointer = context.AccessChain(pointerType, context.SupportBuffer, context.Constant(context.TypeU32(), 1), elemIndex); + varType = AggregateType.U32; + + break; + } + else if (ioVariable == IoVariable.SupportBlockRenderScale) + { + var pointerType = context.TypePointer(StorageClass.Uniform, context.TypeFP32()); + var elemIndex = context.Get(AggregateType.S32, operation.GetSource(srcIndex++)); + pointer = context.AccessChain(pointerType, context.SupportBuffer, context.Constant(context.TypeU32(), 4), elemIndex); + varType = AggregateType.FP32; + + break; + } + else if (ioVariable == IoVariable.SupportBlockViewInverse) + { + var pointerType = context.TypePointer(StorageClass.Uniform, context.TypeFP32()); + var elemIndex = context.Get(AggregateType.S32, operation.GetSource(srcIndex++)); + pointer = context.AccessChain(pointerType, context.SupportBuffer, context.Constant(context.TypeU32(), 2), elemIndex); + varType = AggregateType.FP32; + + break; + } + else + { + (_, varType) = IoMap.GetSpirvBuiltIn(ioVariable); + } + + varType &= AggregateType.ElementTypeMask; + + int inputsCount = (isStore ? operation.SourcesCount - 1 : operation.SourcesCount) - srcIndex; + var storageClass = isOutput ? StorageClass.Output : StorageClass.Input; + + var ioDefinition = new IoDefinition(storageKind, ioVariable, location, component); + var dict = isPerPatch + ? (isOutput ? context.OutputsPerPatch : context.InputsPerPatch) + : (isOutput ? context.Outputs : context.Inputs); + + SpvInstruction baseObj = dict[ioDefinition]; + SpvInstruction e0, e1, e2; + + switch (inputsCount) + { + case 0: + pointer = baseObj; + break; + case 1: + e0 = context.Get(AggregateType.S32, operation.GetSource(srcIndex++)); + pointer = context.AccessChain(context.TypePointer(storageClass, context.GetType(varType)), baseObj, e0); + break; + case 2: + e0 = context.Get(AggregateType.S32, operation.GetSource(srcIndex++)); + e1 = context.Get(AggregateType.S32, operation.GetSource(srcIndex++)); + pointer = context.AccessChain(context.TypePointer(storageClass, context.GetType(varType)), baseObj, e0, e1); + break; + case 3: + e0 = context.Get(AggregateType.S32, operation.GetSource(srcIndex++)); + e1 = context.Get(AggregateType.S32, operation.GetSource(srcIndex++)); + e2 = context.Get(AggregateType.S32, operation.GetSource(srcIndex++)); + pointer = context.AccessChain(context.TypePointer(storageClass, context.GetType(varType)), baseObj, e0, e1, e2); + break; + default: + var indexes = new SpvInstruction[inputsCount]; + int index = 0; + + for (; index < inputsCount; srcIndex++, index++) + { + indexes[index] = context.Get(AggregateType.S32, operation.GetSource(srcIndex)); + } + + pointer = context.AccessChain(context.TypePointer(storageClass, context.GetType(varType)), baseObj, indexes); + break; + } + break; + + default: + throw new InvalidOperationException($"Invalid storage kind {storageKind}."); + } + + if (isStore) + { + context.Store(pointer, context.Get(varType, operation.GetSource(srcIndex))); + return OperationResult.Invalid; + } + else + { + var result = context.Load(context.GetType(varType), pointer); + return new OperationResult(varType, result); + } + } + + private static SpvInstruction GetScalarInput(CodeGenContext context, IoVariable ioVariable) + { + (_, var varType) = IoMap.GetSpirvBuiltIn(ioVariable); + varType &= AggregateType.ElementTypeMask; + + var ioDefinition = new IoDefinition(StorageKind.Input, ioVariable); + + return context.Load(context.GetType(varType), context.Inputs[ioDefinition]); + } + + private static void GenerateStoreSharedSmallInt(CodeGenContext context, AstOperation operation, int bitSize) + { + var offset = context.Get(AggregateType.U32, operation.GetSource(0)); + var value = context.Get(AggregateType.U32, operation.GetSource(1)); + + var wordOffset = context.ShiftRightLogical(context.TypeU32(), offset, context.Constant(context.TypeU32(), 2)); + var bitOffset = context.BitwiseAnd(context.TypeU32(), offset, context.Constant(context.TypeU32(), 3)); + bitOffset = context.ShiftLeftLogical(context.TypeU32(), bitOffset, context.Constant(context.TypeU32(), 3)); + + var memory = context.SharedMemory; + + var elemPointer = context.AccessChain(context.TypePointer(StorageClass.Workgroup, context.TypeU32()), memory, wordOffset); + + GenerateStoreSmallInt(context, elemPointer, bitOffset, value, bitSize); + } + + private static void GenerateStoreStorageSmallInt(CodeGenContext context, AstOperation operation, int bitSize) + { + var i0 = context.Get(AggregateType.S32, operation.GetSource(0)); + var offset = context.Get(AggregateType.U32, operation.GetSource(1)); + var value = context.Get(AggregateType.U32, operation.GetSource(2)); + + var wordOffset = context.ShiftRightLogical(context.TypeU32(), offset, context.Constant(context.TypeU32(), 2)); + var bitOffset = context.BitwiseAnd(context.TypeU32(), offset, context.Constant(context.TypeU32(), 3)); + bitOffset = context.ShiftLeftLogical(context.TypeU32(), bitOffset, context.Constant(context.TypeU32(), 3)); + + var sbVariable = context.StorageBuffersArray; + + var i1 = context.Constant(context.TypeS32(), 0); + + var elemPointer = context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeU32()), sbVariable, i0, i1, wordOffset); + + GenerateStoreSmallInt(context, elemPointer, bitOffset, value, bitSize); + } + + private static void GenerateStoreSmallInt( + CodeGenContext context, + SpvInstruction elemPointer, + SpvInstruction bitOffset, + SpvInstruction value, + int bitSize) + { + var loopStart = context.Label(); + var loopEnd = context.Label(); + + context.Branch(loopStart); + context.AddLabel(loopStart); + + var oldValue = context.Load(context.TypeU32(), elemPointer); + var newValue = context.BitFieldInsert(context.TypeU32(), oldValue, value, bitOffset, context.Constant(context.TypeU32(), bitSize)); + + var one = context.Constant(context.TypeU32(), 1); + var zero = context.Constant(context.TypeU32(), 0); + + var result = context.AtomicCompareExchange(context.TypeU32(), elemPointer, one, zero, zero, newValue, oldValue); + var failed = context.INotEqual(context.TypeBool(), result, oldValue); + + context.LoopMerge(loopEnd, loopStart, LoopControlMask.MaskNone); + context.BranchConditional(failed, loopStart, loopEnd); + + context.AddLabel(loopEnd); + } + + private static OperationResult GetZeroOperationResult( + CodeGenContext context, + AstTextureOperation texOp, + AggregateType scalarType, + bool isVector) + { + var zero = scalarType switch + { + AggregateType.S32 => context.Constant(context.TypeS32(), 0), + AggregateType.U32 => context.Constant(context.TypeU32(), 0u), + _ => context.Constant(context.TypeFP32(), 0f), + }; + + if (isVector) + { + AggregateType outputType = texOp.GetVectorType(scalarType); + + if ((outputType & AggregateType.ElementCountMask) != 0) + { + int componentsCount = BitOperations.PopCount((uint)texOp.Index); + + SpvInstruction[] values = new SpvInstruction[componentsCount]; + + values.AsSpan().Fill(zero); + + return new OperationResult(outputType, context.ConstantComposite(context.GetType(outputType), values)); + } + } + + return new OperationResult(scalarType, zero); + } + + private static SpvInstruction GetSwizzledResult(CodeGenContext context, SpvInstruction vector, AggregateType swizzledResultType, int mask) + { + if ((swizzledResultType & AggregateType.ElementCountMask) != 0) + { + SpvLiteralInteger[] components = new SpvLiteralInteger[BitOperations.PopCount((uint)mask)]; + + int componentIndex = 0; + + for (int i = 0; i < 4; i++) + { + if ((mask & (1 << i)) != 0) + { + components[componentIndex++] = i; + } + } + + return context.VectorShuffle(context.GetType(swizzledResultType), vector, vector, components); + } + else + { + int componentIndex = (int)BitOperations.TrailingZeroCount(mask); + + return context.CompositeExtract(context.GetType(swizzledResultType), vector, (SpvLiteralInteger)componentIndex); + } + } + + private static SpvInstruction GetStorageElemPointer(CodeGenContext context, AstOperation operation) + { + var sbVariable = context.StorageBuffersArray; + var i0 = context.Get(AggregateType.S32, operation.GetSource(0)); + var i1 = context.Constant(context.TypeS32(), 0); + var i2 = context.Get(AggregateType.S32, operation.GetSource(1)); + + return context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeU32()), sbVariable, i0, i1, i2); + } + + private static OperationResult GenerateUnary( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction> emitF, + Func<SpvInstruction, SpvInstruction, SpvInstruction> emitI) + { + var source = operation.GetSource(0); + + if (operation.Inst.HasFlag(Instruction.FP64)) + { + return new OperationResult(AggregateType.FP64, emitF(context.TypeFP64(), context.GetFP64(source))); + } + else if (operation.Inst.HasFlag(Instruction.FP32)) + { + return new OperationResult(AggregateType.FP32, emitF(context.TypeFP32(), context.GetFP32(source))); + } + else + { + return new OperationResult(AggregateType.S32, emitI(context.TypeS32(), context.GetS32(source))); + } + } + + private static OperationResult GenerateUnaryBool( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction> emitB) + { + var source = operation.GetSource(0); + return new OperationResult(AggregateType.Bool, emitB(context.TypeBool(), context.Get(AggregateType.Bool, source))); + } + + private static OperationResult GenerateUnaryFP32( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction> emit) + { + var source = operation.GetSource(0); + return new OperationResult(AggregateType.FP32, emit(context.TypeFP32(), context.GetFP32(source))); + } + + private static OperationResult GenerateUnaryS32( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction> emitS) + { + var source = operation.GetSource(0); + return new OperationResult(AggregateType.S32, emitS(context.TypeS32(), context.GetS32(source))); + } + + private static OperationResult GenerateBinary( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitF, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitI) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + if (operation.Inst.HasFlag(Instruction.FP64)) + { + var result = emitF(context.TypeFP64(), context.GetFP64(src1), context.GetFP64(src2)); + + if (!context.Config.GpuAccessor.QueryHostReducedPrecision()) + { + context.Decorate(result, Decoration.NoContraction); + } + + return new OperationResult(AggregateType.FP64, result); + } + else if (operation.Inst.HasFlag(Instruction.FP32)) + { + var result = emitF(context.TypeFP32(), context.GetFP32(src1), context.GetFP32(src2)); + + if (!context.Config.GpuAccessor.QueryHostReducedPrecision()) + { + context.Decorate(result, Decoration.NoContraction); + } + + return new OperationResult(AggregateType.FP32, result); + } + else + { + return new OperationResult(AggregateType.S32, emitI(context.TypeS32(), context.GetS32(src1), context.GetS32(src2))); + } + } + + private static OperationResult GenerateBinaryBool( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitB) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + return new OperationResult(AggregateType.Bool, emitB(context.TypeBool(), context.Get(AggregateType.Bool, src1), context.Get(AggregateType.Bool, src2))); + } + + private static OperationResult GenerateBinaryS32( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitS) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + return new OperationResult(AggregateType.S32, emitS(context.TypeS32(), context.GetS32(src1), context.GetS32(src2))); + } + + private static OperationResult GenerateBinaryU32( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitU) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + return new OperationResult(AggregateType.U32, emitU(context.TypeU32(), context.GetU32(src1), context.GetU32(src2))); + } + + private static OperationResult GenerateTernary( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitF, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitI) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + var src3 = operation.GetSource(2); + + if (operation.Inst.HasFlag(Instruction.FP64)) + { + var result = emitF(context.TypeFP64(), context.GetFP64(src1), context.GetFP64(src2), context.GetFP64(src3)); + + if (!context.Config.GpuAccessor.QueryHostReducedPrecision()) + { + context.Decorate(result, Decoration.NoContraction); + } + + return new OperationResult(AggregateType.FP64, result); + } + else if (operation.Inst.HasFlag(Instruction.FP32)) + { + var result = emitF(context.TypeFP32(), context.GetFP32(src1), context.GetFP32(src2), context.GetFP32(src3)); + + if (!context.Config.GpuAccessor.QueryHostReducedPrecision()) + { + context.Decorate(result, Decoration.NoContraction); + } + + return new OperationResult(AggregateType.FP32, result); + } + else + { + return new OperationResult(AggregateType.S32, emitI(context.TypeS32(), context.GetS32(src1), context.GetS32(src2), context.GetS32(src3))); + } + } + + private static OperationResult GenerateTernaryU32( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitU) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + var src3 = operation.GetSource(2); + + return new OperationResult(AggregateType.U32, emitU( + context.TypeU32(), + context.GetU32(src1), + context.GetU32(src2), + context.GetU32(src3))); + } + + private static OperationResult GenerateBitfieldExtractS32( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitS) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + var src3 = operation.GetSource(2); + + return new OperationResult(AggregateType.S32, emitS( + context.TypeS32(), + context.GetS32(src1), + context.GetU32(src2), + context.GetU32(src3))); + } + + private static OperationResult GenerateBitfieldInsert( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitS) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + var src3 = operation.GetSource(2); + var src4 = operation.GetSource(3); + + return new OperationResult(AggregateType.U32, emitS( + context.TypeU32(), + context.GetU32(src1), + context.GetU32(src2), + context.GetU32(src3), + context.GetU32(src4))); + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/IoMap.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/IoMap.cs new file mode 100644 index 00000000..d2ff0085 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/IoMap.cs @@ -0,0 +1,86 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using static Spv.Specification; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + static class IoMap + { + // At least 16 attributes are guaranteed by the spec. + private const int MaxAttributes = 16; + + public static (BuiltIn, AggregateType) GetSpirvBuiltIn(IoVariable ioVariable) + { + return ioVariable switch + { + IoVariable.BaseInstance => (BuiltIn.BaseInstance, AggregateType.S32), + IoVariable.BaseVertex => (BuiltIn.BaseVertex, AggregateType.S32), + IoVariable.ClipDistance => (BuiltIn.ClipDistance, AggregateType.Array | AggregateType.FP32), + IoVariable.CtaId => (BuiltIn.WorkgroupId, AggregateType.Vector3 | AggregateType.U32), + IoVariable.DrawIndex => (BuiltIn.DrawIndex, AggregateType.S32), + IoVariable.FragmentCoord => (BuiltIn.FragCoord, AggregateType.Vector4 | AggregateType.FP32), + IoVariable.FragmentOutputDepth => (BuiltIn.FragDepth, AggregateType.FP32), + IoVariable.FrontFacing => (BuiltIn.FrontFacing, AggregateType.Bool), + IoVariable.InstanceId => (BuiltIn.InstanceId, AggregateType.S32), + IoVariable.InstanceIndex => (BuiltIn.InstanceIndex, AggregateType.S32), + IoVariable.InvocationId => (BuiltIn.InvocationId, AggregateType.S32), + IoVariable.Layer => (BuiltIn.Layer, AggregateType.S32), + IoVariable.PatchVertices => (BuiltIn.PatchVertices, AggregateType.S32), + IoVariable.PointCoord => (BuiltIn.PointCoord, AggregateType.Vector2 | AggregateType.FP32), + IoVariable.PointSize => (BuiltIn.PointSize, AggregateType.FP32), + IoVariable.Position => (BuiltIn.Position, AggregateType.Vector4 | AggregateType.FP32), + IoVariable.PrimitiveId => (BuiltIn.PrimitiveId, AggregateType.S32), + IoVariable.SubgroupEqMask => (BuiltIn.SubgroupEqMask, AggregateType.Vector4 | AggregateType.U32), + IoVariable.SubgroupGeMask => (BuiltIn.SubgroupGeMask, AggregateType.Vector4 | AggregateType.U32), + IoVariable.SubgroupGtMask => (BuiltIn.SubgroupGtMask, AggregateType.Vector4 | AggregateType.U32), + IoVariable.SubgroupLaneId => (BuiltIn.SubgroupLocalInvocationId, AggregateType.U32), + IoVariable.SubgroupLeMask => (BuiltIn.SubgroupLeMask, AggregateType.Vector4 | AggregateType.U32), + IoVariable.SubgroupLtMask => (BuiltIn.SubgroupLtMask, AggregateType.Vector4 | AggregateType.U32), + IoVariable.TessellationCoord => (BuiltIn.TessCoord, AggregateType.Vector3 | AggregateType.FP32), + IoVariable.TessellationLevelInner => (BuiltIn.TessLevelInner, AggregateType.Array | AggregateType.FP32), + IoVariable.TessellationLevelOuter => (BuiltIn.TessLevelOuter, AggregateType.Array | AggregateType.FP32), + IoVariable.ThreadId => (BuiltIn.LocalInvocationId, AggregateType.Vector3 | AggregateType.U32), + IoVariable.ThreadKill => (BuiltIn.HelperInvocation, AggregateType.Bool), + IoVariable.VertexId => (BuiltIn.VertexId, AggregateType.S32), + IoVariable.VertexIndex => (BuiltIn.VertexIndex, AggregateType.S32), + IoVariable.ViewportIndex => (BuiltIn.ViewportIndex, AggregateType.S32), + IoVariable.ViewportMask => (BuiltIn.ViewportMaskNV, AggregateType.Array | AggregateType.S32), + _ => (default, AggregateType.Invalid) + }; + } + + public static int GetSpirvBuiltInArrayLength(IoVariable ioVariable) + { + return ioVariable switch + { + IoVariable.ClipDistance => 8, + IoVariable.TessellationLevelInner => 2, + IoVariable.TessellationLevelOuter => 4, + IoVariable.ViewportMask => 1, + IoVariable.UserDefined => MaxAttributes, + _ => 1 + }; + } + + public static bool IsPerVertex(IoVariable ioVariable, ShaderStage stage, bool isOutput) + { + switch (ioVariable) + { + case IoVariable.Layer: + case IoVariable.ViewportIndex: + case IoVariable.PointSize: + case IoVariable.Position: + case IoVariable.UserDefined: + case IoVariable.ClipDistance: + case IoVariable.PointCoord: + case IoVariable.ViewportMask: + return !isOutput && + (stage == ShaderStage.TessellationControl || + stage == ShaderStage.TessellationEvaluation || + stage == ShaderStage.Geometry); + } + + return false; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/OperationResult.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/OperationResult.cs new file mode 100644 index 00000000..f80c8110 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/OperationResult.cs @@ -0,0 +1,19 @@ +using Ryujinx.Graphics.Shader.Translation; +using Spv.Generator; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + readonly struct OperationResult + { + public static OperationResult Invalid => new OperationResult(AggregateType.Invalid, null); + + public AggregateType Type { get; } + public Instruction Value { get; } + + public OperationResult(AggregateType type, Instruction value) + { + Type = type; + Value = value; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/ScalingHelpers.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/ScalingHelpers.cs new file mode 100644 index 00000000..f6c218c6 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/ScalingHelpers.cs @@ -0,0 +1,227 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using static Spv.Specification; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + using SpvInstruction = Spv.Generator.Instruction; + + static class ScalingHelpers + { + public static SpvInstruction ApplyScaling( + CodeGenContext context, + AstTextureOperation texOp, + SpvInstruction vector, + bool intCoords, + bool isBindless, + bool isIndexed, + bool isArray, + int pCount) + { + if (intCoords) + { + if (context.Config.Stage.SupportsRenderScale() && + !isBindless && + !isIndexed) + { + int index = texOp.Inst == Instruction.ImageLoad + ? context.Config.GetTextureDescriptors().Length + context.Config.FindImageDescriptorIndex(texOp) + : context.Config.FindTextureDescriptorIndex(texOp); + + if (pCount == 3 && isArray) + { + return ApplyScaling2DArray(context, vector, index); + } + else if (pCount == 2 && !isArray) + { + return ApplyScaling2D(context, vector, index); + } + } + } + + return vector; + } + + private static SpvInstruction ApplyScaling2DArray(CodeGenContext context, SpvInstruction vector, int index) + { + // The array index is not scaled, just x and y. + var vectorXY = context.VectorShuffle(context.TypeVector(context.TypeS32(), 2), vector, vector, 0, 1); + var vectorZ = context.CompositeExtract(context.TypeS32(), vector, 2); + var vectorXYScaled = ApplyScaling2D(context, vectorXY, index); + var vectorScaled = context.CompositeConstruct(context.TypeVector(context.TypeS32(), 3), vectorXYScaled, vectorZ); + + return vectorScaled; + } + + private static SpvInstruction ApplyScaling2D(CodeGenContext context, SpvInstruction vector, int index) + { + var pointerType = context.TypePointer(StorageClass.Uniform, context.TypeFP32()); + var fieldIndex = context.Constant(context.TypeU32(), 4); + var scaleIndex = context.Constant(context.TypeU32(), index); + + if (context.Config.Stage == ShaderStage.Vertex) + { + var scaleCountPointerType = context.TypePointer(StorageClass.Uniform, context.TypeS32()); + var scaleCountElemPointer = context.AccessChain(scaleCountPointerType, context.SupportBuffer, context.Constant(context.TypeU32(), 3)); + var scaleCount = context.Load(context.TypeS32(), scaleCountElemPointer); + + scaleIndex = context.IAdd(context.TypeU32(), scaleIndex, scaleCount); + } + + scaleIndex = context.IAdd(context.TypeU32(), scaleIndex, context.Constant(context.TypeU32(), 1)); + + var scaleElemPointer = context.AccessChain(pointerType, context.SupportBuffer, fieldIndex, scaleIndex); + var scale = context.Load(context.TypeFP32(), scaleElemPointer); + + var ivector2Type = context.TypeVector(context.TypeS32(), 2); + var localVector = context.CoordTemp; + + var passthrough = context.FOrdEqual(context.TypeBool(), scale, context.Constant(context.TypeFP32(), 1f)); + + var mergeLabel = context.Label(); + + if (context.Config.Stage == ShaderStage.Fragment) + { + var scaledInterpolatedLabel = context.Label(); + var scaledNoInterpolationLabel = context.Label(); + + var needsInterpolation = context.FOrdLessThan(context.TypeBool(), scale, context.Constant(context.TypeFP32(), 0f)); + + context.SelectionMerge(mergeLabel, SelectionControlMask.MaskNone); + context.BranchConditional(needsInterpolation, scaledInterpolatedLabel, scaledNoInterpolationLabel); + + // scale < 0.0 + context.AddLabel(scaledInterpolatedLabel); + + ApplyScalingInterpolated(context, localVector, vector, scale); + context.Branch(mergeLabel); + + // scale >= 0.0 + context.AddLabel(scaledNoInterpolationLabel); + + ApplyScalingNoInterpolation(context, localVector, vector, scale); + context.Branch(mergeLabel); + + context.AddLabel(mergeLabel); + + var passthroughLabel = context.Label(); + var finalMergeLabel = context.Label(); + + context.SelectionMerge(finalMergeLabel, SelectionControlMask.MaskNone); + context.BranchConditional(passthrough, passthroughLabel, finalMergeLabel); + + context.AddLabel(passthroughLabel); + + context.Store(localVector, vector); + context.Branch(finalMergeLabel); + + context.AddLabel(finalMergeLabel); + + return context.Load(ivector2Type, localVector); + } + else + { + var passthroughLabel = context.Label(); + var scaledLabel = context.Label(); + + context.SelectionMerge(mergeLabel, SelectionControlMask.MaskNone); + context.BranchConditional(passthrough, passthroughLabel, scaledLabel); + + // scale == 1.0 + context.AddLabel(passthroughLabel); + + context.Store(localVector, vector); + context.Branch(mergeLabel); + + // scale != 1.0 + context.AddLabel(scaledLabel); + + ApplyScalingNoInterpolation(context, localVector, vector, scale); + context.Branch(mergeLabel); + + context.AddLabel(mergeLabel); + + return context.Load(ivector2Type, localVector); + } + } + + private static void ApplyScalingInterpolated(CodeGenContext context, SpvInstruction output, SpvInstruction vector, SpvInstruction scale) + { + var vector2Type = context.TypeVector(context.TypeFP32(), 2); + + var scaleNegated = context.FNegate(context.TypeFP32(), scale); + var scaleVector = context.CompositeConstruct(vector2Type, scaleNegated, scaleNegated); + + var vectorFloat = context.ConvertSToF(vector2Type, vector); + var vectorScaled = context.VectorTimesScalar(vector2Type, vectorFloat, scaleNegated); + + var fragCoordPointer = context.Inputs[new IoDefinition(StorageKind.Input, IoVariable.FragmentCoord)]; + var fragCoord = context.Load(context.TypeVector(context.TypeFP32(), 4), fragCoordPointer); + var fragCoordXY = context.VectorShuffle(vector2Type, fragCoord, fragCoord, 0, 1); + + var scaleMod = context.FMod(vector2Type, fragCoordXY, scaleVector); + var vectorInterpolated = context.FAdd(vector2Type, vectorScaled, scaleMod); + + context.Store(output, context.ConvertFToS(context.TypeVector(context.TypeS32(), 2), vectorInterpolated)); + } + + private static void ApplyScalingNoInterpolation(CodeGenContext context, SpvInstruction output, SpvInstruction vector, SpvInstruction scale) + { + if (context.Config.Stage == ShaderStage.Vertex) + { + scale = context.GlslFAbs(context.TypeFP32(), scale); + } + + var vector2Type = context.TypeVector(context.TypeFP32(), 2); + + var vectorFloat = context.ConvertSToF(vector2Type, vector); + var vectorScaled = context.VectorTimesScalar(vector2Type, vectorFloat, scale); + + context.Store(output, context.ConvertFToS(context.TypeVector(context.TypeS32(), 2), vectorScaled)); + } + + public static SpvInstruction ApplyUnscaling( + CodeGenContext context, + AstTextureOperation texOp, + SpvInstruction size, + bool isBindless, + bool isIndexed) + { + if (context.Config.Stage.SupportsRenderScale() && + !isBindless && + !isIndexed) + { + int index = context.Config.FindTextureDescriptorIndex(texOp); + + var pointerType = context.TypePointer(StorageClass.Uniform, context.TypeFP32()); + var fieldIndex = context.Constant(context.TypeU32(), 4); + var scaleIndex = context.Constant(context.TypeU32(), index); + + if (context.Config.Stage == ShaderStage.Vertex) + { + var scaleCountPointerType = context.TypePointer(StorageClass.Uniform, context.TypeS32()); + var scaleCountElemPointer = context.AccessChain(scaleCountPointerType, context.SupportBuffer, context.Constant(context.TypeU32(), 3)); + var scaleCount = context.Load(context.TypeS32(), scaleCountElemPointer); + + scaleIndex = context.IAdd(context.TypeU32(), scaleIndex, scaleCount); + } + + scaleIndex = context.IAdd(context.TypeU32(), scaleIndex, context.Constant(context.TypeU32(), 1)); + + var scaleElemPointer = context.AccessChain(pointerType, context.SupportBuffer, fieldIndex, scaleIndex); + var scale = context.GlslFAbs(context.TypeFP32(), context.Load(context.TypeFP32(), scaleElemPointer)); + + var passthrough = context.FOrdEqual(context.TypeBool(), scale, context.Constant(context.TypeFP32(), 1f)); + + var sizeFloat = context.ConvertSToF(context.TypeFP32(), size); + var sizeUnscaled = context.FDiv(context.TypeFP32(), sizeFloat, scale); + var sizeUnscaledInt = context.ConvertFToS(context.TypeS32(), sizeUnscaled); + + return context.Select(context.TypeS32(), passthrough, size, sizeUnscaledInt); + } + + return size; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvDelegates.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvDelegates.cs new file mode 100644 index 00000000..3ccfd7f5 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvDelegates.cs @@ -0,0 +1,226 @@ +using FuncBinaryInstruction = System.Func<Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction>; +using FuncQuaternaryInstruction = System.Func<Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction>; +using FuncTernaryInstruction = System.Func<Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction>; +using FuncUnaryInstruction = System.Func<Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction>; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + /// <summary> + /// Delegate cache for SPIR-V instruction generators. Avoids delegate allocation when passing generators as arguments. + /// </summary> + internal readonly struct SpirvDelegates + { + // Unary + public readonly FuncUnaryInstruction GlslFAbs; + public readonly FuncUnaryInstruction GlslSAbs; + public readonly FuncUnaryInstruction GlslCeil; + public readonly FuncUnaryInstruction GlslCos; + public readonly FuncUnaryInstruction GlslExp2; + public readonly FuncUnaryInstruction GlslFloor; + public readonly FuncUnaryInstruction GlslLog2; + public readonly FuncUnaryInstruction FNegate; + public readonly FuncUnaryInstruction SNegate; + public readonly FuncUnaryInstruction GlslInverseSqrt; + public readonly FuncUnaryInstruction GlslRoundEven; + public readonly FuncUnaryInstruction GlslSin; + public readonly FuncUnaryInstruction GlslSqrt; + public readonly FuncUnaryInstruction GlslTrunc; + + // UnaryBool + public readonly FuncUnaryInstruction LogicalNot; + + // UnaryFP32 + public readonly FuncUnaryInstruction DPdx; + public readonly FuncUnaryInstruction DPdy; + + // UnaryS32 + public readonly FuncUnaryInstruction BitCount; + public readonly FuncUnaryInstruction BitReverse; + public readonly FuncUnaryInstruction Not; + + // Compare + public readonly FuncBinaryInstruction FOrdEqual; + public readonly FuncBinaryInstruction IEqual; + public readonly FuncBinaryInstruction FOrdGreaterThan; + public readonly FuncBinaryInstruction SGreaterThan; + public readonly FuncBinaryInstruction FOrdGreaterThanEqual; + public readonly FuncBinaryInstruction SGreaterThanEqual; + public readonly FuncBinaryInstruction FOrdLessThan; + public readonly FuncBinaryInstruction SLessThan; + public readonly FuncBinaryInstruction FOrdLessThanEqual; + public readonly FuncBinaryInstruction SLessThanEqual; + public readonly FuncBinaryInstruction FOrdNotEqual; + public readonly FuncBinaryInstruction INotEqual; + + // CompareU32 + public readonly FuncBinaryInstruction UGreaterThanEqual; + public readonly FuncBinaryInstruction UGreaterThan; + public readonly FuncBinaryInstruction ULessThanEqual; + public readonly FuncBinaryInstruction ULessThan; + + // Binary + public readonly FuncBinaryInstruction FAdd; + public readonly FuncBinaryInstruction IAdd; + public readonly FuncBinaryInstruction FDiv; + public readonly FuncBinaryInstruction SDiv; + public readonly FuncBinaryInstruction GlslFMax; + public readonly FuncBinaryInstruction GlslSMax; + public readonly FuncBinaryInstruction GlslFMin; + public readonly FuncBinaryInstruction GlslSMin; + public readonly FuncBinaryInstruction FMul; + public readonly FuncBinaryInstruction IMul; + public readonly FuncBinaryInstruction FSub; + public readonly FuncBinaryInstruction ISub; + + // BinaryBool + public readonly FuncBinaryInstruction LogicalAnd; + public readonly FuncBinaryInstruction LogicalNotEqual; + public readonly FuncBinaryInstruction LogicalOr; + + // BinaryS32 + public readonly FuncBinaryInstruction BitwiseAnd; + public readonly FuncBinaryInstruction BitwiseXor; + public readonly FuncBinaryInstruction BitwiseOr; + public readonly FuncBinaryInstruction ShiftLeftLogical; + public readonly FuncBinaryInstruction ShiftRightArithmetic; + public readonly FuncBinaryInstruction ShiftRightLogical; + + // BinaryU32 + public readonly FuncBinaryInstruction GlslUMax; + public readonly FuncBinaryInstruction GlslUMin; + + // AtomicMemoryBinary + public readonly FuncQuaternaryInstruction AtomicIAdd; + public readonly FuncQuaternaryInstruction AtomicAnd; + public readonly FuncQuaternaryInstruction AtomicSMin; + public readonly FuncQuaternaryInstruction AtomicUMin; + public readonly FuncQuaternaryInstruction AtomicSMax; + public readonly FuncQuaternaryInstruction AtomicUMax; + public readonly FuncQuaternaryInstruction AtomicOr; + public readonly FuncQuaternaryInstruction AtomicExchange; + public readonly FuncQuaternaryInstruction AtomicXor; + + // Ternary + public readonly FuncTernaryInstruction GlslFClamp; + public readonly FuncTernaryInstruction GlslSClamp; + public readonly FuncTernaryInstruction GlslFma; + + // TernaryS32 + public readonly FuncTernaryInstruction BitFieldSExtract; + public readonly FuncTernaryInstruction BitFieldUExtract; + + // TernaryU32 + public readonly FuncTernaryInstruction GlslUClamp; + + // QuaternaryS32 + public readonly FuncQuaternaryInstruction BitFieldInsert; + + public SpirvDelegates(CodeGenContext context) + { + // Unary + GlslFAbs = context.GlslFAbs; + GlslSAbs = context.GlslSAbs; + GlslCeil = context.GlslCeil; + GlslCos = context.GlslCos; + GlslExp2 = context.GlslExp2; + GlslFloor = context.GlslFloor; + GlslLog2 = context.GlslLog2; + FNegate = context.FNegate; + SNegate = context.SNegate; + GlslInverseSqrt = context.GlslInverseSqrt; + GlslRoundEven = context.GlslRoundEven; + GlslSin = context.GlslSin; + GlslSqrt = context.GlslSqrt; + GlslTrunc = context.GlslTrunc; + + // UnaryBool + LogicalNot = context.LogicalNot; + + // UnaryFP32 + DPdx = context.DPdx; + DPdy = context.DPdy; + + // UnaryS32 + BitCount = context.BitCount; + BitReverse = context.BitReverse; + Not = context.Not; + + // Compare + FOrdEqual = context.FOrdEqual; + IEqual = context.IEqual; + FOrdGreaterThan = context.FOrdGreaterThan; + SGreaterThan = context.SGreaterThan; + FOrdGreaterThanEqual = context.FOrdGreaterThanEqual; + SGreaterThanEqual = context.SGreaterThanEqual; + FOrdLessThan = context.FOrdLessThan; + SLessThan = context.SLessThan; + FOrdLessThanEqual = context.FOrdLessThanEqual; + SLessThanEqual = context.SLessThanEqual; + FOrdNotEqual = context.FOrdNotEqual; + INotEqual = context.INotEqual; + + // CompareU32 + UGreaterThanEqual = context.UGreaterThanEqual; + UGreaterThan = context.UGreaterThan; + ULessThanEqual = context.ULessThanEqual; + ULessThan = context.ULessThan; + + // Binary + FAdd = context.FAdd; + IAdd = context.IAdd; + FDiv = context.FDiv; + SDiv = context.SDiv; + GlslFMax = context.GlslFMax; + GlslSMax = context.GlslSMax; + GlslFMin = context.GlslFMin; + GlslSMin = context.GlslSMin; + FMul = context.FMul; + IMul = context.IMul; + FSub = context.FSub; + ISub = context.ISub; + + // BinaryBool + LogicalAnd = context.LogicalAnd; + LogicalNotEqual = context.LogicalNotEqual; + LogicalOr = context.LogicalOr; + + // BinaryS32 + BitwiseAnd = context.BitwiseAnd; + BitwiseXor = context.BitwiseXor; + BitwiseOr = context.BitwiseOr; + ShiftLeftLogical = context.ShiftLeftLogical; + ShiftRightArithmetic = context.ShiftRightArithmetic; + ShiftRightLogical = context.ShiftRightLogical; + + // BinaryU32 + GlslUMax = context.GlslUMax; + GlslUMin = context.GlslUMin; + + // AtomicMemoryBinary + AtomicIAdd = context.AtomicIAdd; + AtomicAnd = context.AtomicAnd; + AtomicSMin = context.AtomicSMin; + AtomicUMin = context.AtomicUMin; + AtomicSMax = context.AtomicSMax; + AtomicUMax = context.AtomicUMax; + AtomicOr = context.AtomicOr; + AtomicExchange = context.AtomicExchange; + AtomicXor = context.AtomicXor; + + // Ternary + GlslFClamp = context.GlslFClamp; + GlslSClamp = context.GlslSClamp; + GlslFma = context.GlslFma; + + // TernaryS32 + BitFieldSExtract = context.BitFieldSExtract; + BitFieldUExtract = context.BitFieldUExtract; + + // TernaryU32 + GlslUClamp = context.GlslUClamp; + + // QuaternaryS32 + BitFieldInsert = context.BitFieldInsert; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs new file mode 100644 index 00000000..3e11a974 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs @@ -0,0 +1,415 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Collections.Generic; +using static Spv.Specification; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + using SpvInstruction = Spv.Generator.Instruction; + using SpvInstructionPool = Spv.Generator.GeneratorPool<Spv.Generator.Instruction>; + using SpvLiteralInteger = Spv.Generator.LiteralInteger; + using SpvLiteralIntegerPool = Spv.Generator.GeneratorPool<Spv.Generator.LiteralInteger>; + + static class SpirvGenerator + { + // Resource pools for Spirv generation. Note: Increase count when more threads are being used. + private const int GeneratorPoolCount = 1; + private static ObjectPool<SpvInstructionPool> InstructionPool; + private static ObjectPool<SpvLiteralIntegerPool> IntegerPool; + private static object PoolLock; + + static SpirvGenerator() + { + InstructionPool = new (() => new SpvInstructionPool(), GeneratorPoolCount); + IntegerPool = new (() => new SpvLiteralIntegerPool(), GeneratorPoolCount); + PoolLock = new object(); + } + + private const HelperFunctionsMask NeedsInvocationIdMask = + HelperFunctionsMask.Shuffle | + HelperFunctionsMask.ShuffleDown | + HelperFunctionsMask.ShuffleUp | + HelperFunctionsMask.ShuffleXor | + HelperFunctionsMask.SwizzleAdd; + + public static byte[] Generate(StructuredProgramInfo info, ShaderConfig config) + { + SpvInstructionPool instPool; + SpvLiteralIntegerPool integerPool; + + lock (PoolLock) + { + instPool = InstructionPool.Allocate(); + integerPool = IntegerPool.Allocate(); + } + + CodeGenContext context = new CodeGenContext(info, config, instPool, integerPool); + + context.AddCapability(Capability.GroupNonUniformBallot); + context.AddCapability(Capability.GroupNonUniformShuffle); + context.AddCapability(Capability.GroupNonUniformVote); + context.AddCapability(Capability.ImageBuffer); + context.AddCapability(Capability.ImageGatherExtended); + context.AddCapability(Capability.ImageQuery); + context.AddCapability(Capability.SampledBuffer); + + if (config.TransformFeedbackEnabled && config.LastInVertexPipeline) + { + context.AddCapability(Capability.TransformFeedback); + } + + if (config.Stage == ShaderStage.Fragment) + { + if (context.Info.IoDefinitions.Contains(new IoDefinition(StorageKind.Input, IoVariable.Layer))) + { + context.AddCapability(Capability.Geometry); + } + + if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock()) + { + context.AddCapability(Capability.FragmentShaderPixelInterlockEXT); + context.AddExtension("SPV_EXT_fragment_shader_interlock"); + } + } + else if (config.Stage == ShaderStage.Geometry) + { + context.AddCapability(Capability.Geometry); + + if (config.GpPassthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough()) + { + context.AddExtension("SPV_NV_geometry_shader_passthrough"); + context.AddCapability(Capability.GeometryShaderPassthroughNV); + } + } + else if (config.Stage == ShaderStage.TessellationControl || config.Stage == ShaderStage.TessellationEvaluation) + { + context.AddCapability(Capability.Tessellation); + } + else if (config.Stage == ShaderStage.Vertex) + { + context.AddCapability(Capability.DrawParameters); + } + + if (context.Info.IoDefinitions.Contains(new IoDefinition(StorageKind.Output, IoVariable.ViewportMask))) + { + context.AddExtension("SPV_NV_viewport_array2"); + context.AddCapability(Capability.ShaderViewportMaskNV); + } + + if ((info.HelperFunctionsMask & NeedsInvocationIdMask) != 0) + { + info.IoDefinitions.Add(new IoDefinition(StorageKind.Input, IoVariable.SubgroupLaneId)); + } + + Declarations.DeclareAll(context, info); + + for (int funcIndex = 0; funcIndex < info.Functions.Count; funcIndex++) + { + var function = info.Functions[funcIndex]; + var retType = context.GetType(function.ReturnType); + + var funcArgs = new SpvInstruction[function.InArguments.Length + function.OutArguments.Length]; + + for (int argIndex = 0; argIndex < funcArgs.Length; argIndex++) + { + var argType = context.GetType(function.GetArgumentType(argIndex)); + var argPointerType = context.TypePointer(StorageClass.Function, argType); + funcArgs[argIndex] = argPointerType; + } + + var funcType = context.TypeFunction(retType, false, funcArgs); + var spvFunc = context.Function(retType, FunctionControlMask.MaskNone, funcType); + + context.DeclareFunction(funcIndex, function, spvFunc); + } + + for (int funcIndex = 0; funcIndex < info.Functions.Count; funcIndex++) + { + Generate(context, info, funcIndex); + } + + byte[] result = context.Generate(); + + lock (PoolLock) + { + InstructionPool.Release(instPool); + IntegerPool.Release(integerPool); + } + + return result; + } + + private static void Generate(CodeGenContext context, StructuredProgramInfo info, int funcIndex) + { + var function = info.Functions[funcIndex]; + + (_, var spvFunc) = context.GetFunction(funcIndex); + + context.AddFunction(spvFunc); + context.StartFunction(); + + Declarations.DeclareParameters(context, function); + + context.EnterBlock(function.MainBlock); + + Declarations.DeclareLocals(context, function); + Declarations.DeclareLocalForArgs(context, info.Functions); + + Generate(context, function.MainBlock); + + // Functions must always end with a return. + if (!(function.MainBlock.Last is AstOperation operation) || + (operation.Inst != Instruction.Return && operation.Inst != Instruction.Discard)) + { + context.Return(); + } + + context.FunctionEnd(); + + if (funcIndex == 0) + { + context.AddEntryPoint(context.Config.Stage.Convert(), spvFunc, "main", context.GetMainInterface()); + + if (context.Config.Stage == ShaderStage.TessellationControl) + { + context.AddExecutionMode(spvFunc, ExecutionMode.OutputVertices, (SpvLiteralInteger)context.Config.ThreadsPerInputPrimitive); + } + else if (context.Config.Stage == ShaderStage.TessellationEvaluation) + { + switch (context.Config.GpuAccessor.QueryTessPatchType()) + { + case TessPatchType.Isolines: + context.AddExecutionMode(spvFunc, ExecutionMode.Isolines); + break; + case TessPatchType.Triangles: + context.AddExecutionMode(spvFunc, ExecutionMode.Triangles); + break; + case TessPatchType.Quads: + context.AddExecutionMode(spvFunc, ExecutionMode.Quads); + break; + } + + switch (context.Config.GpuAccessor.QueryTessSpacing()) + { + case TessSpacing.EqualSpacing: + context.AddExecutionMode(spvFunc, ExecutionMode.SpacingEqual); + break; + case TessSpacing.FractionalEventSpacing: + context.AddExecutionMode(spvFunc, ExecutionMode.SpacingFractionalEven); + break; + case TessSpacing.FractionalOddSpacing: + context.AddExecutionMode(spvFunc, ExecutionMode.SpacingFractionalOdd); + break; + } + + bool tessCw = context.Config.GpuAccessor.QueryTessCw(); + + if (context.Config.Options.TargetApi == TargetApi.Vulkan) + { + // We invert the front face on Vulkan backend, so we need to do that here as well. + tessCw = !tessCw; + } + + if (tessCw) + { + context.AddExecutionMode(spvFunc, ExecutionMode.VertexOrderCw); + } + else + { + context.AddExecutionMode(spvFunc, ExecutionMode.VertexOrderCcw); + } + } + else if (context.Config.Stage == ShaderStage.Geometry) + { + InputTopology inputTopology = context.Config.GpuAccessor.QueryPrimitiveTopology(); + + context.AddExecutionMode(spvFunc, inputTopology switch + { + InputTopology.Points => ExecutionMode.InputPoints, + InputTopology.Lines => ExecutionMode.InputLines, + InputTopology.LinesAdjacency => ExecutionMode.InputLinesAdjacency, + InputTopology.Triangles => ExecutionMode.Triangles, + InputTopology.TrianglesAdjacency => ExecutionMode.InputTrianglesAdjacency, + _ => throw new InvalidOperationException($"Invalid input topology \"{inputTopology}\".") + }); + + context.AddExecutionMode(spvFunc, ExecutionMode.Invocations, (SpvLiteralInteger)context.Config.ThreadsPerInputPrimitive); + + context.AddExecutionMode(spvFunc, context.Config.OutputTopology switch + { + OutputTopology.PointList => ExecutionMode.OutputPoints, + OutputTopology.LineStrip => ExecutionMode.OutputLineStrip, + OutputTopology.TriangleStrip => ExecutionMode.OutputTriangleStrip, + _ => throw new InvalidOperationException($"Invalid output topology \"{context.Config.OutputTopology}\".") + }); + + int maxOutputVertices = context.Config.GpPassthrough ? context.InputVertices : context.Config.MaxOutputVertices; + + context.AddExecutionMode(spvFunc, ExecutionMode.OutputVertices, (SpvLiteralInteger)maxOutputVertices); + } + else if (context.Config.Stage == ShaderStage.Fragment) + { + context.AddExecutionMode(spvFunc, context.Config.Options.TargetApi == TargetApi.Vulkan + ? ExecutionMode.OriginUpperLeft + : ExecutionMode.OriginLowerLeft); + + if (context.Info.IoDefinitions.Contains(new IoDefinition(StorageKind.Output, IoVariable.FragmentOutputDepth))) + { + context.AddExecutionMode(spvFunc, ExecutionMode.DepthReplacing); + } + + if (context.Config.GpuAccessor.QueryEarlyZForce()) + { + context.AddExecutionMode(spvFunc, ExecutionMode.EarlyFragmentTests); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.FSI) != 0 && + context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock()) + { + context.AddExecutionMode(spvFunc, ExecutionMode.PixelInterlockOrderedEXT); + } + } + else if (context.Config.Stage == ShaderStage.Compute) + { + var localSizeX = (SpvLiteralInteger)context.Config.GpuAccessor.QueryComputeLocalSizeX(); + var localSizeY = (SpvLiteralInteger)context.Config.GpuAccessor.QueryComputeLocalSizeY(); + var localSizeZ = (SpvLiteralInteger)context.Config.GpuAccessor.QueryComputeLocalSizeZ(); + + context.AddExecutionMode( + spvFunc, + ExecutionMode.LocalSize, + localSizeX, + localSizeY, + localSizeZ); + } + + if (context.Config.TransformFeedbackEnabled && context.Config.LastInVertexPipeline) + { + context.AddExecutionMode(spvFunc, ExecutionMode.Xfb); + } + } + } + + private static void Generate(CodeGenContext context, AstBlock block) + { + AstBlockVisitor visitor = new AstBlockVisitor(block); + + var loopTargets = new Dictionary<AstBlock, (SpvInstruction, SpvInstruction)>(); + + context.LoopTargets = loopTargets; + + visitor.BlockEntered += (sender, e) => + { + AstBlock mergeBlock = e.Block.Parent; + + if (e.Block.Type == AstBlockType.If) + { + AstBlock ifTrueBlock = e.Block; + AstBlock ifFalseBlock; + + if (AstHelper.Next(e.Block) is AstBlock nextBlock && nextBlock.Type == AstBlockType.Else) + { + ifFalseBlock = nextBlock; + } + else + { + ifFalseBlock = mergeBlock; + } + + var condition = context.Get(AggregateType.Bool, e.Block.Condition); + + context.SelectionMerge(context.GetNextLabel(mergeBlock), SelectionControlMask.MaskNone); + context.BranchConditional(condition, context.GetNextLabel(ifTrueBlock), context.GetNextLabel(ifFalseBlock)); + } + else if (e.Block.Type == AstBlockType.DoWhile) + { + var continueTarget = context.Label(); + + loopTargets.Add(e.Block, (context.NewBlock(), continueTarget)); + + context.LoopMerge(context.GetNextLabel(mergeBlock), continueTarget, LoopControlMask.MaskNone); + context.Branch(context.GetFirstLabel(e.Block)); + } + + context.EnterBlock(e.Block); + }; + + visitor.BlockLeft += (sender, e) => + { + if (e.Block.Parent != null) + { + if (e.Block.Type == AstBlockType.DoWhile) + { + // This is a loop, we need to jump back to the loop header + // if the condition is true. + AstBlock mergeBlock = e.Block.Parent; + + (var loopTarget, var continueTarget) = loopTargets[e.Block]; + + context.Branch(continueTarget); + context.AddLabel(continueTarget); + + var condition = context.Get(AggregateType.Bool, e.Block.Condition); + + context.BranchConditional(condition, loopTarget, context.GetNextLabel(mergeBlock)); + } + else + { + // We only need a branch if the last instruction didn't + // already cause the program to exit or jump elsewhere. + bool lastIsCf = e.Block.Last is AstOperation lastOp && + (lastOp.Inst == Instruction.Discard || + lastOp.Inst == Instruction.LoopBreak || + lastOp.Inst == Instruction.LoopContinue || + lastOp.Inst == Instruction.Return); + + if (!lastIsCf) + { + context.Branch(context.GetNextLabel(e.Block.Parent)); + } + } + + bool hasElse = AstHelper.Next(e.Block) is AstBlock nextBlock && + (nextBlock.Type == AstBlockType.Else || + nextBlock.Type == AstBlockType.ElseIf); + + // Re-enter the parent block. + if (e.Block.Parent != null && !hasElse) + { + context.EnterBlock(e.Block.Parent); + } + } + }; + + foreach (IAstNode node in visitor.Visit()) + { + if (node is AstAssignment assignment) + { + var dest = (AstOperand)assignment.Destination; + + if (dest.Type == OperandType.LocalVariable) + { + var source = context.Get(dest.VarType, assignment.Source); + context.Store(context.GetLocalPointer(dest), source); + } + else if (dest.Type == OperandType.Argument) + { + var source = context.Get(dest.VarType, assignment.Source); + context.Store(context.GetArgumentPointer(dest), source); + } + else + { + throw new NotImplementedException(dest.Type.ToString()); + } + } + else if (node is AstOperation operation) + { + Instructions.Generate(context, operation); + } + } + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/TextureMeta.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/TextureMeta.cs new file mode 100644 index 00000000..4de05603 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/TextureMeta.cs @@ -0,0 +1,4 @@ +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + readonly record struct TextureMeta(int CbufSlot, int Handle, TextureFormat Format); +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Constants.cs b/src/Ryujinx.Graphics.Shader/Constants.cs new file mode 100644 index 00000000..c6f9ef49 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Constants.cs @@ -0,0 +1,16 @@ +namespace Ryujinx.Graphics.Shader +{ + static class Constants + { + public const int ConstantBufferSize = 0x10000; // In bytes + + public const int MaxAttributes = 16; + public const int AllAttributesMask = (int)(uint.MaxValue >> (32 - MaxAttributes)); + + public const int NvnBaseVertexByteOffset = 0x640; + public const int NvnBaseInstanceByteOffset = 0x644; + public const int NvnDrawIndexByteOffset = 0x648; + + public const int StorageAlignment = 16; + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Decoders/Block.cs b/src/Ryujinx.Graphics.Shader/Decoders/Block.cs new file mode 100644 index 00000000..7d94e3f9 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Decoders/Block.cs @@ -0,0 +1,168 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Ryujinx.Graphics.Shader.Decoders +{ + class PushOpInfo + { + public InstOp Op { get; } + public Dictionary<Block, Operand> Consumers; + + public PushOpInfo(InstOp op) + { + Op = op; + Consumers = new Dictionary<Block, Operand>(); + } + } + + readonly struct SyncTarget + { + public PushOpInfo PushOpInfo { get; } + public int PushOpId { get; } + + public SyncTarget(PushOpInfo pushOpInfo, int pushOpId) + { + PushOpInfo = pushOpInfo; + PushOpId = pushOpId; + } + } + + class Block + { + public ulong Address { get; set; } + public ulong EndAddress { get; set; } + + public List<Block> Predecessors { get; } + public List<Block> Successors { get; } + + public List<InstOp> OpCodes { get; } + public List<PushOpInfo> PushOpCodes { get; } + public Dictionary<ulong, SyncTarget> SyncTargets { get; } + + public Block(ulong address) + { + Address = address; + + Predecessors = new List<Block>(); + Successors = new List<Block>(); + + OpCodes = new List<InstOp>(); + PushOpCodes = new List<PushOpInfo>(); + SyncTargets = new Dictionary<ulong, SyncTarget>(); + } + + public void Split(Block rightBlock) + { + int splitIndex = BinarySearch(OpCodes, rightBlock.Address); + + if (OpCodes[splitIndex].Address < rightBlock.Address) + { + splitIndex++; + } + + int splitCount = OpCodes.Count - splitIndex; + if (splitCount <= 0) + { + throw new ArgumentException("Can't split at right block address."); + } + + rightBlock.EndAddress = EndAddress; + rightBlock.Successors.AddRange(Successors); + rightBlock.Predecessors.Add(this); + + EndAddress = rightBlock.Address; + + Successors.Clear(); + Successors.Add(rightBlock); + + // Move ops. + rightBlock.OpCodes.AddRange(OpCodes.GetRange(splitIndex, splitCount)); + + OpCodes.RemoveRange(splitIndex, splitCount); + + // Update push consumers that points to this block. + foreach (SyncTarget syncTarget in SyncTargets.Values) + { + PushOpInfo pushOpInfo = syncTarget.PushOpInfo; + + Operand local = pushOpInfo.Consumers[this]; + pushOpInfo.Consumers.Remove(this); + pushOpInfo.Consumers.Add(rightBlock, local); + } + + foreach ((ulong key, SyncTarget value) in SyncTargets) + { + rightBlock.SyncTargets.Add(key, value); + } + + SyncTargets.Clear(); + + // Move push ops. + for (int i = 0; i < PushOpCodes.Count; i++) + { + if (PushOpCodes[i].Op.Address >= rightBlock.Address) + { + int count = PushOpCodes.Count - i; + rightBlock.PushOpCodes.AddRange(PushOpCodes.Skip(i)); + PushOpCodes.RemoveRange(i, count); + break; + } + } + } + + private static int BinarySearch(List<InstOp> opCodes, ulong address) + { + int left = 0; + int middle = 0; + int right = opCodes.Count - 1; + + while (left <= right) + { + int size = right - left; + + middle = left + (size >> 1); + + InstOp opCode = opCodes[middle]; + + if (address == opCode.Address) + { + break; + } + + if (address < opCode.Address) + { + right = middle - 1; + } + else + { + left = middle + 1; + } + } + + return middle; + } + + public InstOp GetLastOp() + { + if (OpCodes.Count != 0) + { + return OpCodes[OpCodes.Count - 1]; + } + + return default; + } + + public bool HasNext() + { + InstOp lastOp = GetLastOp(); + return OpCodes.Count != 0 && !Decoder.IsUnconditionalBranch(ref lastOp); + } + + public void AddPushOp(InstOp op) + { + PushOpCodes.Add(new PushOpInfo(op)); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Decoders/DecodedFunction.cs b/src/Ryujinx.Graphics.Shader/Decoders/DecodedFunction.cs new file mode 100644 index 00000000..7a172fe6 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Decoders/DecodedFunction.cs @@ -0,0 +1,48 @@ +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Shader.Decoders +{ + class DecodedFunction + { + private readonly HashSet<DecodedFunction> _callers; + + public bool IsCompilerGenerated => Type != FunctionType.User; + public FunctionType Type { get; set; } + public int Id { get; set; } + + public ulong Address { get; } + public Block[] Blocks { get; private set; } + + public DecodedFunction(ulong address) + { + Address = address; + _callers = new HashSet<DecodedFunction>(); + Type = FunctionType.User; + Id = -1; + } + + public void SetBlocks(Block[] blocks) + { + if (Blocks != null) + { + throw new InvalidOperationException("Blocks have already been set."); + } + + Blocks = blocks; + } + + public void AddCaller(DecodedFunction caller) + { + _callers.Add(caller); + } + + public void RemoveCaller(DecodedFunction caller) + { + if (_callers.Remove(caller) && _callers.Count == 0) + { + Type = FunctionType.Unused; + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Decoders/DecodedProgram.cs b/src/Ryujinx.Graphics.Shader/Decoders/DecodedProgram.cs new file mode 100644 index 00000000..2dd60155 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Decoders/DecodedProgram.cs @@ -0,0 +1,57 @@ +using System; +using System.Collections; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Shader.Decoders +{ + readonly struct DecodedProgram : IEnumerable<DecodedFunction> + { + public DecodedFunction MainFunction { get; } + private readonly IReadOnlyDictionary<ulong, DecodedFunction> _functions; + private readonly List<DecodedFunction> _functionsWithId; + public int FunctionsWithIdCount => _functionsWithId.Count; + + public DecodedProgram(DecodedFunction mainFunction, IReadOnlyDictionary<ulong, DecodedFunction> functions) + { + MainFunction = mainFunction; + _functions = functions; + _functionsWithId = new List<DecodedFunction>(); + } + + public DecodedFunction GetFunctionByAddress(ulong address) + { + if (_functions.TryGetValue(address, out DecodedFunction function)) + { + return function; + } + + return null; + } + + public DecodedFunction GetFunctionById(int id) + { + if ((uint)id >= (uint)_functionsWithId.Count) + { + throw new ArgumentOutOfRangeException(nameof(id)); + } + + return _functionsWithId[id]; + } + + public void AddFunctionAndSetId(DecodedFunction function) + { + function.Id = _functionsWithId.Count; + _functionsWithId.Add(function); + } + + public IEnumerator<DecodedFunction> GetEnumerator() + { + return _functions.Values.GetEnumerator(); + } + + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs b/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs new file mode 100644 index 00000000..c619b9bb --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs @@ -0,0 +1,765 @@ +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.CompilerServices; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Decoders +{ + static class Decoder + { + public static DecodedProgram Decode(ShaderConfig config, ulong startAddress) + { + Queue<DecodedFunction> functionsQueue = new Queue<DecodedFunction>(); + Dictionary<ulong, DecodedFunction> functionsVisited = new Dictionary<ulong, DecodedFunction>(); + + DecodedFunction EnqueueFunction(ulong address) + { + if (!functionsVisited.TryGetValue(address, out DecodedFunction function)) + { + functionsVisited.Add(address, function = new DecodedFunction(address)); + functionsQueue.Enqueue(function); + } + + return function; + } + + DecodedFunction mainFunction = EnqueueFunction(0); + + while (functionsQueue.TryDequeue(out DecodedFunction currentFunction)) + { + List<Block> blocks = new List<Block>(); + Queue<Block> workQueue = new Queue<Block>(); + Dictionary<ulong, Block> visited = new Dictionary<ulong, Block>(); + + Block GetBlock(ulong blkAddress) + { + if (!visited.TryGetValue(blkAddress, out Block block)) + { + block = new Block(blkAddress); + + workQueue.Enqueue(block); + visited.Add(blkAddress, block); + } + + return block; + } + + GetBlock(currentFunction.Address); + + bool hasNewTarget; + + do + { + while (workQueue.TryDequeue(out Block currBlock)) + { + // Check if the current block is inside another block. + if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex)) + { + Block nBlock = blocks[nBlkIndex]; + + if (nBlock.Address == currBlock.Address) + { + throw new InvalidOperationException("Found duplicate block address on the list."); + } + + nBlock.Split(currBlock); + blocks.Insert(nBlkIndex + 1, currBlock); + + continue; + } + + // If we have a block after the current one, set the limit address. + ulong limitAddress = ulong.MaxValue; + + if (nBlkIndex != blocks.Count) + { + Block nBlock = blocks[nBlkIndex]; + + int nextIndex = nBlkIndex + 1; + + if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count) + { + limitAddress = blocks[nextIndex].Address; + } + else if (nBlock.Address > currBlock.Address) + { + limitAddress = blocks[nBlkIndex].Address; + } + } + + FillBlock(config, currBlock, limitAddress, startAddress); + + if (currBlock.OpCodes.Count != 0) + { + // We should have blocks for all possible branch targets, + // including those from PBK/PCNT/SSY instructions. + foreach (PushOpInfo pushOp in currBlock.PushOpCodes) + { + GetBlock(pushOp.Op.GetAbsoluteAddress()); + } + + // Set child blocks. "Branch" is the block the branch instruction + // points to (when taken), "Next" is the block at the next address, + // executed when the branch is not taken. For Unconditional Branches + // or end of program, Next is null. + InstOp lastOp = currBlock.GetLastOp(); + + if (lastOp.Name == InstName.Cal) + { + EnqueueFunction(lastOp.GetAbsoluteAddress()).AddCaller(currentFunction); + } + else if (lastOp.Name == InstName.Bra) + { + Block succBlock = GetBlock(lastOp.GetAbsoluteAddress()); + currBlock.Successors.Add(succBlock); + succBlock.Predecessors.Add(currBlock); + } + + if (!IsUnconditionalBranch(ref lastOp)) + { + Block succBlock = GetBlock(currBlock.EndAddress); + currBlock.Successors.Insert(0, succBlock); + succBlock.Predecessors.Add(currBlock); + } + } + + // Insert the new block on the list (sorted by address). + if (blocks.Count != 0) + { + Block nBlock = blocks[nBlkIndex]; + + blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock); + } + else + { + blocks.Add(currBlock); + } + } + + // Propagate SSY/PBK addresses into their uses (SYNC/BRK). + foreach (Block block in blocks.Where(x => x.PushOpCodes.Count != 0)) + { + for (int pushOpIndex = 0; pushOpIndex < block.PushOpCodes.Count; pushOpIndex++) + { + PropagatePushOp(visited, block, pushOpIndex); + } + } + + // Try to find targets for BRX (indirect branch) instructions. + hasNewTarget = FindBrxTargets(config, blocks, GetBlock); + + // If we discovered new branch targets from the BRX instruction, + // we need another round of decoding to decode the new blocks. + // Additionally, we may have more SSY/PBK targets to propagate, + // and new BRX instructions. + } + while (hasNewTarget); + + currentFunction.SetBlocks(blocks.ToArray()); + } + + return new DecodedProgram(mainFunction, functionsVisited); + } + + private static bool BinarySearch(List<Block> blocks, ulong address, out int index) + { + index = 0; + + int left = 0; + int right = blocks.Count - 1; + + while (left <= right) + { + int size = right - left; + + int middle = left + (size >> 1); + + Block block = blocks[middle]; + + index = middle; + + if (address >= block.Address && address < block.EndAddress) + { + return true; + } + + if (address < block.Address) + { + right = middle - 1; + } + else + { + left = middle + 1; + } + } + + return false; + } + + private static void FillBlock(ShaderConfig config, Block block, ulong limitAddress, ulong startAddress) + { + IGpuAccessor gpuAccessor = config.GpuAccessor; + + ulong address = block.Address; + int bufferOffset = 0; + ReadOnlySpan<ulong> buffer = ReadOnlySpan<ulong>.Empty; + + InstOp op = default; + + do + { + if (address + 7 >= limitAddress) + { + break; + } + + // Ignore scheduling instructions, which are written every 32 bytes. + if ((address & 0x1f) == 0) + { + address += 8; + bufferOffset++; + continue; + } + + if (bufferOffset >= buffer.Length) + { + buffer = gpuAccessor.GetCode(startAddress + address, 8); + bufferOffset = 0; + } + + ulong opCode = buffer[bufferOffset++]; + + op = InstTable.GetOp(address, opCode); + + if (op.Props.HasFlag(InstProps.TexB)) + { + config.SetUsedFeature(FeatureFlags.Bindless); + } + + if (op.Name == InstName.Ald || op.Name == InstName.Ast || op.Name == InstName.Ipa) + { + SetUserAttributeUses(config, op.Name, opCode); + } + else if (op.Name == InstName.Pbk || op.Name == InstName.Pcnt || op.Name == InstName.Ssy) + { + block.AddPushOp(op); + } + + block.OpCodes.Add(op); + + address += 8; + } + while (!op.Props.HasFlag(InstProps.Bra)); + + block.EndAddress = address; + } + + private static void SetUserAttributeUses(ShaderConfig config, InstName name, ulong opCode) + { + int offset; + int count = 1; + bool isStore = false; + bool indexed = false; + bool perPatch = false; + + if (name == InstName.Ast) + { + InstAst opAst = new InstAst(opCode); + count = (int)opAst.AlSize + 1; + offset = opAst.Imm11; + indexed = opAst.Phys; + perPatch = opAst.P; + isStore = true; + } + else if (name == InstName.Ald) + { + InstAld opAld = new InstAld(opCode); + count = (int)opAld.AlSize + 1; + offset = opAld.Imm11; + indexed = opAld.Phys; + perPatch = opAld.P; + isStore = opAld.O; + } + else /* if (name == InstName.Ipa) */ + { + InstIpa opIpa = new InstIpa(opCode); + offset = opIpa.Imm10; + indexed = opIpa.Idx; + } + + if (indexed) + { + if (isStore) + { + config.SetAllOutputUserAttributes(); + config.SetUsedFeature(FeatureFlags.OaIndexing); + } + else + { + config.SetAllInputUserAttributes(); + config.SetUsedFeature(FeatureFlags.IaIndexing); + } + } + else + { + for (int elemIndex = 0; elemIndex < count; elemIndex++) + { + int attr = offset + elemIndex * 4; + + if (perPatch) + { + if (attr >= AttributeConsts.UserAttributePerPatchBase && attr < AttributeConsts.UserAttributePerPatchEnd) + { + int userAttr = attr - AttributeConsts.UserAttributePerPatchBase; + int index = userAttr / 16; + + if (isStore) + { + config.SetOutputUserAttributePerPatch(index); + } + else + { + config.SetInputUserAttributePerPatch(index); + } + } + } + else if (attr >= AttributeConsts.UserAttributeBase && attr < AttributeConsts.UserAttributeEnd) + { + int userAttr = attr - AttributeConsts.UserAttributeBase; + int index = userAttr / 16; + + if (isStore) + { + config.SetOutputUserAttribute(index); + } + else + { + config.SetInputUserAttribute(index, (userAttr >> 2) & 3); + } + } + + if (!isStore && + (attr == AttributeConsts.FogCoord || + (attr >= AttributeConsts.FrontColorDiffuseR && attr < AttributeConsts.ClipDistance0) || + (attr >= AttributeConsts.TexCoordBase && attr < AttributeConsts.TexCoordEnd))) + { + config.SetUsedFeature(FeatureFlags.FixedFuncAttr); + } + } + } + } + + public static bool IsUnconditionalBranch(ref InstOp op) + { + return IsUnconditional(ref op) && op.Props.HasFlag(InstProps.Bra); + } + + private static bool IsUnconditional(ref InstOp op) + { + InstConditional condOp = new InstConditional(op.RawOpCode); + + if ((op.Name == InstName.Bra || op.Name == InstName.Exit) && condOp.Ccc != Ccc.T) + { + return false; + } + + return condOp.Pred == RegisterConsts.PredicateTrueIndex && !condOp.PredInv; + } + + private static bool FindBrxTargets(ShaderConfig config, IEnumerable<Block> blocks, Func<ulong, Block> getBlock) + { + bool hasNewTarget = false; + + foreach (Block block in blocks) + { + InstOp lastOp = block.GetLastOp(); + bool hasNext = block.HasNext(); + + if (lastOp.Name == InstName.Brx && block.Successors.Count == (hasNext ? 1 : 0)) + { + HashSet<ulong> visited = new HashSet<ulong>(); + + InstBrx opBrx = new InstBrx(lastOp.RawOpCode); + ulong baseOffset = lastOp.GetAbsoluteAddress(); + + // An indirect branch could go anywhere, + // try to get the possible target offsets from the constant buffer. + (int cbBaseOffset, int cbOffsetsCount) = FindBrxTargetRange(block, opBrx.SrcA); + + if (cbOffsetsCount != 0) + { + hasNewTarget = true; + } + + for (int i = 0; i < cbOffsetsCount; i++) + { + uint targetOffset = config.ConstantBuffer1Read(cbBaseOffset + i * 4); + ulong targetAddress = baseOffset + targetOffset; + + if (visited.Add(targetAddress)) + { + Block target = getBlock(targetAddress); + target.Predecessors.Add(block); + block.Successors.Add(target); + } + } + } + } + + return hasNewTarget; + } + + private static (int, int) FindBrxTargetRange(Block block, int brxReg) + { + // Try to match the following pattern: + // + // IMNMX.U32 Rx, Rx, UpperBound, PT + // SHL Rx, Rx, 0x2 + // LDC Rx, c[0x1][Rx+BaseOffset] + // + // Here, Rx is an arbitrary register, "UpperBound" and "BaseOffset" are constants. + // The above pattern is assumed to be generated by the compiler before BRX, + // as the instruction is usually used to implement jump tables for switch statement optimizations. + // On a successful match, "BaseOffset" is the offset in bytes where the jump offsets are + // located on the constant buffer, and "UpperBound" is the total number of offsets for the BRX, minus 1. + + HashSet<Block> visited = new HashSet<Block>(); + + var ldcLocation = FindFirstRegWrite(visited, new BlockLocation(block, block.OpCodes.Count - 1), brxReg); + if (ldcLocation.Block == null || ldcLocation.Block.OpCodes[ldcLocation.Index].Name != InstName.Ldc) + { + return (0, 0); + } + + GetOp<InstLdc>(ldcLocation, out var opLdc); + + if (opLdc.CbufSlot != 1 || opLdc.AddressMode != 0) + { + return (0, 0); + } + + var shlLocation = FindFirstRegWrite(visited, ldcLocation, opLdc.SrcA); + if (shlLocation.Block == null || !shlLocation.IsImmInst(InstName.Shl)) + { + return (0, 0); + } + + GetOp<InstShlI>(shlLocation, out var opShl); + + if (opShl.Imm20 != 2) + { + return (0, 0); + } + + var imnmxLocation = FindFirstRegWrite(visited, shlLocation, opShl.SrcA); + if (imnmxLocation.Block == null || !imnmxLocation.IsImmInst(InstName.Imnmx)) + { + return (0, 0); + } + + GetOp<InstImnmxI>(imnmxLocation, out var opImnmx); + + if (opImnmx.Signed || opImnmx.SrcPred != RegisterConsts.PredicateTrueIndex || opImnmx.SrcPredInv) + { + return (0, 0); + } + + return (opLdc.CbufOffset, opImnmx.Imm20 + 1); + } + + private static void GetOp<T>(BlockLocation location, out T op) where T : unmanaged + { + ulong rawOp = location.Block.OpCodes[location.Index].RawOpCode; + op = Unsafe.As<ulong, T>(ref rawOp); + } + + private readonly struct BlockLocation + { + public Block Block { get; } + public int Index { get; } + + public BlockLocation(Block block, int index) + { + Block = block; + Index = index; + } + + public bool IsImmInst(InstName name) + { + InstOp op = Block.OpCodes[Index]; + return op.Name == name && op.Props.HasFlag(InstProps.Ib); + } + } + + private static BlockLocation FindFirstRegWrite(HashSet<Block> visited, BlockLocation location, int regIndex) + { + Queue<BlockLocation> toVisit = new Queue<BlockLocation>(); + toVisit.Enqueue(location); + visited.Add(location.Block); + + while (toVisit.TryDequeue(out var currentLocation)) + { + Block block = currentLocation.Block; + for (int i = currentLocation.Index - 1; i >= 0; i--) + { + if (WritesToRegister(block.OpCodes[i], regIndex)) + { + return new BlockLocation(block, i); + } + } + + foreach (Block predecessor in block.Predecessors) + { + if (visited.Add(predecessor)) + { + toVisit.Enqueue(new BlockLocation(predecessor, predecessor.OpCodes.Count)); + } + } + } + + return new BlockLocation(null, 0); + } + + private static bool WritesToRegister(InstOp op, int regIndex) + { + // Predicate instruction only ever writes to predicate, so we shouldn't check those. + if ((op.Props & (InstProps.Rd | InstProps.Rd2)) == 0) + { + return false; + } + + if (op.Props.HasFlag(InstProps.Rd2) && (byte)(op.RawOpCode >> 28) == regIndex) + { + return true; + } + + return (byte)op.RawOpCode == regIndex; + } + + private enum MergeType + { + Brk, + Cont, + Sync + } + + private struct PathBlockState + { + public Block Block { get; } + + private enum RestoreType + { + None, + PopPushOp, + PushBranchOp + } + + private RestoreType _restoreType; + + private ulong _restoreValue; + private MergeType _restoreMergeType; + + public bool ReturningFromVisit => _restoreType != RestoreType.None; + + public PathBlockState(Block block) + { + Block = block; + _restoreType = RestoreType.None; + _restoreValue = 0; + _restoreMergeType = default; + } + + public PathBlockState(int oldStackSize) + { + Block = null; + _restoreType = RestoreType.PopPushOp; + _restoreValue = (ulong)oldStackSize; + _restoreMergeType = default; + } + + public PathBlockState(ulong syncAddress, MergeType mergeType) + { + Block = null; + _restoreType = RestoreType.PushBranchOp; + _restoreValue = syncAddress; + _restoreMergeType = mergeType; + } + + public void RestoreStackState(Stack<(ulong, MergeType)> branchStack) + { + if (_restoreType == RestoreType.PushBranchOp) + { + branchStack.Push((_restoreValue, _restoreMergeType)); + } + else if (_restoreType == RestoreType.PopPushOp) + { + while (branchStack.Count > (uint)_restoreValue) + { + branchStack.Pop(); + } + } + } + } + + private static void PropagatePushOp(Dictionary<ulong, Block> blocks, Block currBlock, int pushOpIndex) + { + PushOpInfo pushOpInfo = currBlock.PushOpCodes[pushOpIndex]; + InstOp pushOp = pushOpInfo.Op; + + Block target = blocks[pushOp.GetAbsoluteAddress()]; + + Stack<PathBlockState> workQueue = new Stack<PathBlockState>(); + HashSet<Block> visited = new HashSet<Block>(); + Stack<(ulong, MergeType)> branchStack = new Stack<(ulong, MergeType)>(); + + void Push(PathBlockState pbs) + { + // When block is null, this means we are pushing a restore operation. + // Restore operations are used to undo the work done inside a block + // when we return from it, for example it pops addresses pushed by + // SSY/PBK instructions inside the block, and pushes addresses poped + // by SYNC/BRK. + // For blocks, if it's already visited, we just ignore to avoid going + // around in circles and getting stuck here. + if (pbs.Block == null || !visited.Contains(pbs.Block)) + { + workQueue.Push(pbs); + } + } + + Push(new PathBlockState(currBlock)); + + while (workQueue.TryPop(out PathBlockState pbs)) + { + if (pbs.ReturningFromVisit) + { + pbs.RestoreStackState(branchStack); + + continue; + } + + Block current = pbs.Block; + + // If the block was already processed, we just ignore it, otherwise + // we would push the same child blocks of an already processed block, + // and go around in circles until memory is exhausted. + if (!visited.Add(current)) + { + continue; + } + + int pushOpsCount = current.PushOpCodes.Count; + if (pushOpsCount != 0) + { + Push(new PathBlockState(branchStack.Count)); + + for (int index = pushOpIndex; index < pushOpsCount; index++) + { + InstOp currentPushOp = current.PushOpCodes[index].Op; + MergeType pushMergeType = GetMergeTypeFromPush(currentPushOp.Name); + branchStack.Push((currentPushOp.GetAbsoluteAddress(), pushMergeType)); + } + } + + pushOpIndex = 0; + + bool hasNext = current.HasNext(); + if (hasNext) + { + Push(new PathBlockState(current.Successors[0])); + } + + InstOp lastOp = current.GetLastOp(); + if (IsPopBranch(lastOp.Name)) + { + MergeType popMergeType = GetMergeTypeFromPop(lastOp.Name); + + bool found = true; + ulong targetAddress = 0UL; + MergeType mergeType; + + do + { + if (branchStack.Count == 0) + { + found = false; + break; + } + + (targetAddress, mergeType) = branchStack.Pop(); + + // Push the target address (this will be used to push the address + // back into the PBK/PCNT/SSY stack when we return from that block), + Push(new PathBlockState(targetAddress, mergeType)); + } + while (mergeType != popMergeType); + + // Make sure we found the correct address, + // the push and pop instruction types must match, so: + // - BRK can only consume addresses pushed by PBK. + // - CONT can only consume addresses pushed by PCNT. + // - SYNC can only consume addresses pushed by SSY. + if (found) + { + if (branchStack.Count == 0) + { + // If the entire stack was consumed, then the current pop instruction + // just consumed the address from our push instruction. + if (current.SyncTargets.TryAdd(pushOp.Address, new SyncTarget(pushOpInfo, current.SyncTargets.Count))) + { + pushOpInfo.Consumers.Add(current, Local()); + target.Predecessors.Add(current); + current.Successors.Add(target); + } + } + else + { + // Push the block itself into the work queue for processing. + Push(new PathBlockState(blocks[targetAddress])); + } + } + } + else + { + // By adding them in descending order (sorted by address), we process the blocks + // in order (of ascending address), since we work with a LIFO. + foreach (Block possibleTarget in current.Successors.OrderByDescending(x => x.Address)) + { + if (!hasNext || possibleTarget != current.Successors[0]) + { + Push(new PathBlockState(possibleTarget)); + } + } + } + } + } + + public static bool IsPopBranch(InstName name) + { + return name == InstName.Brk || name == InstName.Cont || name == InstName.Sync; + } + + private static MergeType GetMergeTypeFromPush(InstName name) + { + return name switch + { + InstName.Pbk => MergeType.Brk, + InstName.Pcnt => MergeType.Cont, + _ => MergeType.Sync + }; + } + + private static MergeType GetMergeTypeFromPop(InstName name) + { + return name switch + { + InstName.Brk => MergeType.Brk, + InstName.Cont => MergeType.Cont, + _ => MergeType.Sync + }; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Decoders/FunctionType.cs b/src/Ryujinx.Graphics.Shader/Decoders/FunctionType.cs new file mode 100644 index 00000000..6ea6a82a --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Decoders/FunctionType.cs @@ -0,0 +1,10 @@ +namespace Ryujinx.Graphics.Shader.Decoders +{ + enum FunctionType : byte + { + User, + Unused, + BuiltInFSIBegin, + BuiltInFSIEnd + } +} diff --git a/src/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs b/src/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs new file mode 100644 index 00000000..0c22ddc0 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs @@ -0,0 +1,5383 @@ +namespace Ryujinx.Graphics.Shader.Decoders +{ + enum AlSize + { + _32 = 0, + _64 = 1, + _96 = 2, + _128 = 3, + } + + enum AtomSize + { + U32 = 0, + S32 = 1, + U64 = 2, + F32FtzRn = 3, + F16x2FtzRn = 4, + S64 = 5, + } + + enum AtomOp + { + Add = 0, + Min = 1, + Max = 2, + Inc = 3, + Dec = 4, + And = 5, + Or = 6, + Xor = 7, + Exch = 8, + Safeadd = 10, + } + + enum AtomsSize + { + U32 = 0, + S32 = 1, + U64 = 2, + S64 = 3, + } + + enum BarMode + { + Bar = 0, + Result = 1, + Warp = 2, + } + + enum BarOp + { + Sync = 0, + Arv = 1, + Red = 2, + Scan = 3, + SyncAll = 4, + } + + enum BarRedOp + { + Popc = 0, + And = 1, + Or = 2, + } + + enum Bpt + { + DrainIllegal = 0, + Cal = 1, + Pause = 2, + Trap = 3, + Int = 4, + Drain = 5, + } + + enum Ccc + { + F = 0, + Lt = 1, + Eq = 2, + Le = 3, + Gt = 4, + Ne = 5, + Ge = 6, + Num = 7, + Nan = 8, + Ltu = 9, + Equ = 10, + Leu = 11, + Gtu = 12, + Neu = 13, + Geu = 14, + T = 15, + Off = 16, + Lo = 17, + Sff = 18, + Ls = 19, + Hi = 20, + Sft = 21, + Hs = 22, + Oft = 23, + CsmTa = 24, + CsmTr = 25, + CsmMx = 26, + FcsmTa = 27, + FcsmTr = 28, + FcsmMx = 29, + Rle = 30, + Rgt = 31, + } + + enum CacheType + { + U = 1, + C = 2, + I = 3, + Crs = 4, + } + + enum CctlOp + { + Pf1 = 1, + Pf1_5 = 2, + Pf2 = 3, + Wb = 4, + Iv = 5, + Ivall = 6, + Rs = 7, + Rslb = 9, + } + + enum CctltOp + { + Ivth = 1, + } + + enum BoolOp + { + And = 0, + Or = 1, + Xor = 2, + } + + enum SReg + { + LaneId = 0, + Clock = 1, + VirtCfg = 2, + VirtId = 3, + Pm0 = 4, + Pm1 = 5, + Pm2 = 6, + Pm3 = 7, + Pm4 = 8, + Pm5 = 9, + Pm6 = 10, + Pm7 = 11, + OrderingTicket = 15, + PrimType = 16, + InvocationId = 17, + YDirection = 18, + ThreadKill = 19, + ShaderType = 20, + DirectCbeWriteAddressLow = 21, + DirectCbeWriteAddressHigh = 22, + DirectCbeWriteEnabled = 23, + MachineId0 = 24, + MachineId1 = 25, + MachineId2 = 26, + MachineId3 = 27, + Affinity = 28, + InvocationInfo = 29, + WScaleFactorXY = 30, + WScaleFactorZ = 31, + TId = 32, + TIdX = 33, + TIdY = 34, + TIdZ = 35, + CtaParam = 36, + CtaIdX = 37, + CtaIdY = 38, + CtaIdZ = 39, + Ntid = 40, + CirQueueIncrMinusOne = 41, + Nlatc = 42, + Swinlo = 48, + Swinsz = 49, + Smemsz = 50, + Smembanks = 51, + LWinLo = 52, + LWinSz = 53, + LMemLoSz = 54, + LMemHiOff = 55, + EqMask = 56, + LtMask = 57, + LeMask = 58, + GtMask = 59, + GeMask = 60, + RegAlloc = 61, + CtxAddr = 62, + GlobalErrorStatus = 64, + WarpErrorStatus = 66, + WarpErrorStatusClear = 67, + PmHi0 = 72, + PmHi1 = 73, + PmHi2 = 74, + PmHi3 = 75, + PmHi4 = 76, + PmHi5 = 77, + PmHi6 = 78, + PmHi7 = 79, + ClockLo = 80, + ClockHi = 81, + GlobalTimerLo = 82, + GlobalTimerHi = 83, + HwTaskId = 96, + CircularQueueEntryIndex = 97, + CircularQueueEntryAddressLow = 98, + CircularQueueEntryAddressHigh = 99, + } + + enum RoundMode + { + Rn = 0, + Rm = 1, + Rp = 2, + Rz = 3, + } + + enum FComp + { + F = 0, + Lt = 1, + Eq = 2, + Le = 3, + Gt = 4, + Ne = 5, + Ge = 6, + Num = 7, + Nan = 8, + Ltu = 9, + Equ = 10, + Leu = 11, + Gtu = 12, + Neu = 13, + Geu = 14, + T = 15, + } + + enum IntegerRound + { + Pass = 1, + Round = 4, + Floor = 5, + Ceil = 6, + Trunc = 7, + } + + enum IDstFmt + { + U16 = 1, + U32 = 2, + U64 = 3, + S16 = 5, + S32 = 6, + S64 = 7, + } + + enum ISrcFmt + { + U8 = 0, + U16 = 1, + U32 = 2, + U64 = 3, + S8 = 4, + S16 = 5, + S32 = 6, + S64 = 7, + } + + enum ISrcDstFmt + { + U8 = 0, + U16 = 1, + U32 = 2, + S8 = 4, + S16 = 5, + S32 = 6, + } + + enum RoundMode2 + { + Round = 0, + Floor = 1, + Ceil = 2, + Trunc = 3, + } + + enum ChkModeF + { + Divide = 0, + } + + enum Fmz + { + Ftz = 1, + Fmz = 2, + } + + enum MultiplyScale + { + NoScale = 0, + D2 = 1, + D4 = 2, + D8 = 3, + M8 = 4, + M4 = 5, + M2 = 6, + } + + enum OFmt + { + F16 = 0, + F32 = 1, + MrgH0 = 2, + MrgH1 = 3, + } + + enum HalfSwizzle + { + F16 = 0, + F32 = 1, + H0H0 = 2, + H1H1 = 3, + } + + enum ByteSel + { + B0 = 0, + B1 = 1, + B2 = 2, + B3 = 3, + } + + enum DstFmt + { + F16 = 1, + F32 = 2, + F64 = 3, + } + + enum AvgMode + { + NoNeg = 0, + NegB = 1, + NegA = 2, + PlusOne = 3, + } + + enum Lrs + { + None = 0, + RightShift = 1, + LeftShift = 2, + } + + enum HalfSelect + { + B32 = 0, + H0 = 1, + H1 = 2, + } + + enum IComp + { + F = 0, + Lt = 1, + Eq = 2, + Le = 3, + Gt = 4, + Ne = 5, + Ge = 6, + T = 7, + } + + enum XMode + { + Xlo = 1, + Xmed = 2, + Xhi = 3, + } + + enum IpaOp + { + Pass = 0, + Multiply = 1, + Constant = 2, + Sc = 3, + } + + enum IBase + { + Patch = 1, + Prim = 2, + Attr = 3, + } + + enum CacheOpLd + { + Ca = 0, + Cg = 1, + Ci = 2, + Cv = 3, + } + + enum CacheOpSt + { + Wb = 0, + Cg = 1, + Ci = 2, + Wt = 3, + } + + enum LsSize + { + U8 = 0, + S8 = 1, + U16 = 2, + S16 = 3, + B32 = 4, + B64 = 5, + B128 = 6, + UB128 = 7, + } + + enum LsSize2 + { + U8 = 0, + S8 = 1, + U16 = 2, + S16 = 3, + B32 = 4, + B64 = 5, + B128 = 6, + } + + enum AddressMode + { + Il = 1, + Is = 2, + Isl = 3, + } + + enum CacheOp2 + { + Lu = 1, + Ci = 2, + Cv = 3, + } + + enum PredicateOp + { + F = 0, + T = 1, + Z = 2, + Nz = 3, + } + + enum LogicOp + { + And = 0, + Or = 1, + Xor = 2, + PassB = 3, + } + + enum Membar + { + Cta = 0, + Gl = 1, + Sys = 2, + Vc = 3, + } + + enum Ivall + { + Ivalld = 1, + Ivallt = 2, + Ivalltd = 3, + } + + enum MufuOp + { + Cos = 0, + Sin = 1, + Ex2 = 2, + Lg2 = 3, + Rcp = 4, + Rsq = 5, + Rcp64h = 6, + Rsq64h = 7, + Sqrt = 8, + } + + enum OutType + { + Emit = 1, + Cut = 2, + EmitThenCut = 3, + } + + enum PixMode + { + Covmask = 1, + Covered = 2, + Offset = 3, + CentroidOffset = 4, + MyIndex = 5, + } + + enum PMode + { + F4e = 1, + B4e = 2, + Rc8 = 3, + Ecl = 4, + Ecr = 5, + Rc16 = 6, + } + + enum RedOp + { + Add = 0, + Min = 1, + Max = 2, + Inc = 3, + Dec = 4, + And = 5, + Or = 6, + Xor = 7, + } + + enum XModeShf + { + Hi = 1, + X = 2, + Xhi = 3, + } + + enum MaxShift + { + U64 = 2, + S64 = 3, + } + + enum ShflMode + { + Idx = 0, + Up = 1, + Down = 2, + Bfly = 3, + } + + enum Clamp + { + Ign = 0, + Trap = 2, + } + + enum SuatomSize + { + U32 = 0, + S32 = 1, + U64 = 2, + F32FtzRn = 3, + F16x2FtzRn = 4, + S64 = 5, + Sd32 = 6, + Sd64 = 7, + } + + enum SuDim + { + _1d = 0, + _1dBuffer = 1, + _1dArray = 2, + _2d = 3, + _2dArray = 4, + _3d = 5, + } + + enum SuatomOp + { + Add = 0, + Min = 1, + Max = 2, + Inc = 3, + Dec = 4, + And = 5, + Or = 6, + Xor = 7, + Exch = 8, + } + + enum SuSize + { + U8 = 0, + S8 = 1, + U16 = 2, + S16 = 3, + B32 = 4, + B64 = 5, + B128 = 6, + UB128 = 7, + } + + enum SuRgba + { + R = 1, + G = 2, + Rg = 3, + B = 4, + Rb = 5, + Gb = 6, + Rgb = 7, + A = 8, + Ra = 9, + Ga = 10, + Rga = 11, + Ba = 12, + Rba = 13, + Gba = 14, + Rgba = 15, + } + + enum Lod + { + Lz = 1, + Lb = 2, + Ll = 3, + Lba = 6, + Lla = 7, + } + + enum TexDim + { + _1d = 0, + Array1d = 1, + _2d = 2, + Array2d = 3, + _3d = 4, + Array3d = 5, + Cube = 6, + ArrayCube = 7, + } + + enum TexsTarget + { + Texture1DLodZero = 0, + Texture2D = 1, + Texture2DLodZero = 2, + Texture2DLodLevel = 3, + Texture2DDepthCompare = 4, + Texture2DLodLevelDepthCompare = 5, + Texture2DLodZeroDepthCompare = 6, + Texture2DArray = 7, + Texture2DArrayLodZero = 8, + Texture2DArrayLodZeroDepthCompare = 9, + Texture3D = 10, + Texture3DLodZero = 11, + TextureCube = 12, + TextureCubeLodLevel = 13, + } + + enum TldsTarget + { + Texture1DLodZero = 0x0, + Texture1DLodLevel = 0x1, + Texture2DLodZero = 0x2, + Texture2DLodZeroOffset = 0x4, + Texture2DLodLevel = 0x5, + Texture2DLodZeroMultisample = 0x6, + Texture3DLodZero = 0x7, + Texture2DArrayLodZero = 0x8, + Texture2DLodLevelOffset = 0xc + } + + enum TexComp + { + R = 0, + G = 1, + B = 2, + A = 3, + } + + enum TexOffset + { + None = 0, + Aoffi = 1, + Ptp = 2, + } + + enum TexQuery + { + TexHeaderDimension = 1, + TexHeaderTextureType = 2, + TexHeaderSamplerPos = 5, + TexSamplerFilter = 16, + TexSamplerLod = 18, + TexSamplerWrap = 20, + TexSamplerBorderColor = 22, + } + + enum VectorSelect + { + U8B0 = 0, + U8B1 = 1, + U8B2 = 2, + U8B3 = 3, + U16H0 = 4, + U16H1 = 5, + U32 = 6, + S8B0 = 8, + S8B1 = 9, + S8B2 = 10, + S8B3 = 11, + S16H0 = 12, + S16H1 = 13, + S32 = 14, + } + + enum VideoOp + { + Mrg16h = 0, + Mrg16l = 1, + Mrg8b0 = 2, + Mrg8b2 = 3, + Acc = 4, + Min = 5, + Max = 6, + } + + enum VideoRed + { + Acc = 1, + } + + enum LaneMask4 + { + Z = 1, + W = 2, + Zw = 3, + X = 4, + Xz = 5, + Xw = 6, + Xzw = 7, + Y = 8, + Yz = 9, + Yw = 10, + Yzw = 11, + Xy = 12, + Xyz = 13, + Xyw = 14, + Xyzw = 15, + } + + enum ASelect4 + { + _0000 = 0, + _1111 = 1, + _2222 = 2, + _3333 = 3, + _3210 = 4, + _5432 = 6, + _6543 = 7, + _3201 = 8, + _3012 = 9, + _0213 = 10, + _3120 = 11, + _1230 = 12, + _2310 = 13, + } + + enum BSelect4 + { + _4444 = 0, + _5555 = 1, + _6666 = 2, + _7777 = 3, + _7654 = 4, + _5432 = 6, + _4321 = 7, + _4567 = 8, + _6745 = 9, + _5476 = 10, + } + + enum VideoScale + { + Shr7 = 1, + Shr15 = 2, + } + + enum VoteMode + { + All = 0, + Any = 1, + Eq = 2, + } + + enum XmadCop + { + Cfull = 0, + Clo = 1, + Chi = 2, + Csfu = 3, + Cbcc = 4, + } + + enum XmadCop2 + { + Cfull = 0, + Clo = 1, + Chi = 2, + Csfu = 3, + } + + enum ImadspASelect + { + U32 = 0, + S32 = 1, + U24 = 2, + S24 = 3, + U16h0 = 4, + S16h0 = 5, + U16h1 = 6, + S16h1 = 7, + } + + enum ImadspBSelect + { + U24 = 0, + S24 = 1, + U16h0 = 2, + S16h0 = 3, + } + + struct InstConditional + { + private ulong _opcode; + public InstConditional(ulong opcode) => _opcode = opcode; + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public Ccc Ccc => (Ccc)((_opcode >> 0) & 0x1F); + } + + struct InstAl2p + { + private ulong _opcode; + public InstAl2p(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public AlSize AlSize => (AlSize)((_opcode >> 47) & 0x3); + public bool Aio => (_opcode & 0x100000000) != 0; + public int Imm11 => (int)((_opcode >> 20) & 0x7FF); + public int DestPred => (int)((_opcode >> 44) & 0x7); + } + + struct InstAld + { + private ulong _opcode; + public InstAld(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int Imm11 => (int)((_opcode >> 20) & 0x7FF); + public bool P => (_opcode & 0x80000000) != 0; + public bool O => (_opcode & 0x100000000) != 0; + public AlSize AlSize => (AlSize)((_opcode >> 47) & 0x3); + public bool Phys => !P && Imm11 == 0 && SrcA != RegisterConsts.RegisterZeroIndex; + } + + struct InstAst + { + private ulong _opcode; + public InstAst(ulong opcode) => _opcode = opcode; + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 0) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int Imm11 => (int)((_opcode >> 20) & 0x7FF); + public bool P => (_opcode & 0x80000000) != 0; + public AlSize AlSize => (AlSize)((_opcode >> 47) & 0x3); + public bool Phys => !P && Imm11 == 0 && SrcA != RegisterConsts.RegisterZeroIndex; + } + + struct InstAtom + { + private ulong _opcode; + public InstAtom(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int Imm20 => (int)((_opcode >> 28) & 0xFFFFF); + public AtomSize Size => (AtomSize)((_opcode >> 49) & 0x7); + public AtomOp Op => (AtomOp)((_opcode >> 52) & 0xF); + public bool E => (_opcode & 0x1000000000000) != 0; + } + + struct InstAtomCas + { + private ulong _opcode; + public InstAtomCas(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int BcRz => (int)((_opcode >> 50) & 0x3); + public bool E => (_opcode & 0x1000000000000) != 0; + } + + struct InstAtoms + { + private ulong _opcode; + public InstAtoms(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int Imm22 => (int)((_opcode >> 30) & 0x3FFFFF); + public AtomsSize AtomsSize => (AtomsSize)((_opcode >> 28) & 0x3); + public AtomOp AtomOp => (AtomOp)((_opcode >> 52) & 0xF); + } + + struct InstAtomsCas + { + private ulong _opcode; + public InstAtomsCas(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int AtomsBcRz => (int)((_opcode >> 28) & 0x3); + } + + struct InstB2r + { + private ulong _opcode; + public InstB2r(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int DestPred => (int)((_opcode >> 45) & 0x7); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public BarMode Mode => (BarMode)((_opcode >> 32) & 0x3); + } + + struct InstBar + { + private ulong _opcode; + public InstBar(ulong opcode) => _opcode = opcode; + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm12 => (int)((_opcode >> 20) & 0xFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public BarOp BarOp => (BarOp)((_opcode >> 32) & 0x7); + public BarRedOp BarRedOp => (BarRedOp)((_opcode >> 35) & 0x3); + public bool AFixBar => (_opcode & 0x100000000000) != 0; + public bool BFixBar => (_opcode & 0x80000000000) != 0; + } + + struct InstBfeR + { + private ulong _opcode; + public InstBfeR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Signed => (_opcode & 0x1000000000000) != 0; + public bool Brev => (_opcode & 0x10000000000) != 0; + } + + struct InstBfeI + { + private ulong _opcode; + public InstBfeI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Signed => (_opcode & 0x1000000000000) != 0; + public bool Brev => (_opcode & 0x10000000000) != 0; + } + + struct InstBfeC + { + private ulong _opcode; + public InstBfeC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Signed => (_opcode & 0x1000000000000) != 0; + public bool Brev => (_opcode & 0x10000000000) != 0; + } + + struct InstBfiR + { + private ulong _opcode; + public InstBfiR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + } + + struct InstBfiI + { + private ulong _opcode; + public InstBfiI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + } + + struct InstBfiC + { + private ulong _opcode; + public InstBfiC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + } + + struct InstBfiRc + { + private ulong _opcode; + public InstBfiRc(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + } + + struct InstBpt + { + private ulong _opcode; + public InstBpt(ulong opcode) => _opcode = opcode; + public int Imm20 => (int)((_opcode >> 20) & 0xFFFFF); + public Bpt Bpt => (Bpt)((_opcode >> 6) & 0x7); + } + + struct InstBra + { + private ulong _opcode; + public InstBra(ulong opcode) => _opcode = opcode; + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public Ccc Ccc => (Ccc)((_opcode >> 0) & 0x1F); + public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF); + public bool Ca => (_opcode & 0x20) != 0; + public bool Lmt => (_opcode & 0x40) != 0; + public bool U => (_opcode & 0x80) != 0; + } + + struct InstBrk + { + private ulong _opcode; + public InstBrk(ulong opcode) => _opcode = opcode; + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public Ccc Ccc => (Ccc)((_opcode >> 0) & 0x1F); + } + + struct InstBrx + { + private ulong _opcode; + public InstBrx(ulong opcode) => _opcode = opcode; + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public Ccc Ccc => (Ccc)((_opcode >> 0) & 0x1F); + public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF); + public bool Ca => (_opcode & 0x20) != 0; + public bool Lmt => (_opcode & 0x40) != 0; + } + + struct InstCal + { + private ulong _opcode; + public InstCal(ulong opcode) => _opcode = opcode; + public bool Ca => (_opcode & 0x20) != 0; + public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF); + public bool Inc => (_opcode & 0x40) != 0; + } + + struct InstCctl + { + private ulong _opcode; + public InstCctl(ulong opcode) => _opcode = opcode; + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int Imm30 => (int)((_opcode >> 22) & 0x3FFFFFFF); + public bool E => (_opcode & 0x10000000000000) != 0; + public CacheType Cache => (CacheType)((_opcode >> 4) & 0x7); + public CctlOp CctlOp => (CctlOp)((_opcode >> 0) & 0xF); + } + + struct InstCctll + { + private ulong _opcode; + public InstCctll(ulong opcode) => _opcode = opcode; + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int Imm22 => (int)((_opcode >> 22) & 0x3FFFFF); + public int Cache => (int)((_opcode >> 4) & 0x3); + public CctlOp CctlOp => (CctlOp)((_opcode >> 0) & 0xF); + } + + struct InstCctlt + { + private ulong _opcode; + public InstCctlt(ulong opcode) => _opcode = opcode; + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int TsIdx13 => (int)((_opcode >> 36) & 0x1FFF); + public CctltOp CctltOp => (CctltOp)((_opcode >> 0) & 0x3); + } + + struct InstCctltR + { + private ulong _opcode; + public InstCctltR(ulong opcode) => _opcode = opcode; + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public CctltOp CctltOp => (CctltOp)((_opcode >> 0) & 0x3); + } + + struct InstCont + { + private ulong _opcode; + public InstCont(ulong opcode) => _opcode = opcode; + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public Ccc Ccc => (Ccc)((_opcode >> 0) & 0x1F); + } + + struct InstCset + { + private ulong _opcode; + public InstCset(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public Ccc Ccc => (Ccc)((_opcode >> 8) & 0x1F); + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public bool BVal => (_opcode & 0x100000000000) != 0; + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + } + + struct InstCsetp + { + private ulong _opcode; + public InstCsetp(ulong opcode) => _opcode = opcode; + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public Ccc Ccc => (Ccc)((_opcode >> 8) & 0x1F); + public int DestPred => (int)((_opcode >> 3) & 0x7); + public int DestPredInv => (int)((_opcode >> 0) & 0x7); + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + } + + struct InstCs2r + { + private ulong _opcode; + public InstCs2r(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public SReg SReg => (SReg)((_opcode >> 20) & 0xFF); + } + + struct InstDaddR + { + private ulong _opcode; + public InstDaddR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegA => (_opcode & 0x1000000000000) != 0; + public bool AbsA => (_opcode & 0x400000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3); + } + + struct InstDaddI + { + private ulong _opcode; + public InstDaddI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegA => (_opcode & 0x1000000000000) != 0; + public bool AbsA => (_opcode & 0x400000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3); + } + + struct InstDaddC + { + private ulong _opcode; + public InstDaddC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegA => (_opcode & 0x1000000000000) != 0; + public bool AbsA => (_opcode & 0x400000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3); + } + + struct InstDepbar + { + private ulong _opcode; + public InstDepbar(ulong opcode) => _opcode = opcode; + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool Le => (_opcode & 0x20000000) != 0; + public int Sbid => (int)((_opcode >> 26) & 0x7); + public int PendCnt => (int)((_opcode >> 20) & 0x3F); + public int Imm6 => (int)((_opcode >> 0) & 0x3F); + } + + struct InstDfmaR + { + private ulong _opcode; + public InstDfmaR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public RoundMode RoundMode => (RoundMode)((_opcode >> 50) & 0x3); + public bool NegC => (_opcode & 0x2000000000000) != 0; + public bool NegA => (_opcode & 0x1000000000000) != 0; + } + + struct InstDfmaI + { + private ulong _opcode; + public InstDfmaI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public RoundMode RoundMode => (RoundMode)((_opcode >> 50) & 0x3); + public bool NegC => (_opcode & 0x2000000000000) != 0; + public bool NegA => (_opcode & 0x1000000000000) != 0; + } + + struct InstDfmaC + { + private ulong _opcode; + public InstDfmaC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public RoundMode RoundMode => (RoundMode)((_opcode >> 50) & 0x3); + public bool NegC => (_opcode & 0x2000000000000) != 0; + public bool NegA => (_opcode & 0x1000000000000) != 0; + } + + struct InstDfmaRc + { + private ulong _opcode; + public InstDfmaRc(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public RoundMode RoundMode => (RoundMode)((_opcode >> 50) & 0x3); + public bool NegC => (_opcode & 0x2000000000000) != 0; + public bool NegA => (_opcode & 0x1000000000000) != 0; + } + + struct InstDmnmxR + { + private ulong _opcode; + public InstDmnmxR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegA => (_opcode & 0x1000000000000) != 0; + public bool AbsA => (_opcode & 0x400000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + } + + struct InstDmnmxI + { + private ulong _opcode; + public InstDmnmxI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegA => (_opcode & 0x1000000000000) != 0; + public bool AbsA => (_opcode & 0x400000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + } + + struct InstDmnmxC + { + private ulong _opcode; + public InstDmnmxC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegA => (_opcode & 0x1000000000000) != 0; + public bool AbsA => (_opcode & 0x400000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + } + + struct InstDmulR + { + private ulong _opcode; + public InstDmulR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3); + public bool NegA => (_opcode & 0x1000000000000) != 0; + } + + struct InstDmulI + { + private ulong _opcode; + public InstDmulI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3); + public bool NegA => (_opcode & 0x1000000000000) != 0; + } + + struct InstDmulC + { + private ulong _opcode; + public InstDmulC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3); + public bool NegA => (_opcode & 0x1000000000000) != 0; + } + + struct InstDsetR + { + private ulong _opcode; + public InstDsetR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool AbsA => (_opcode & 0x40000000000000) != 0; + public bool NegB => (_opcode & 0x20000000000000) != 0; + public bool BVal => (_opcode & 0x10000000000000) != 0; + public FComp FComp => (FComp)((_opcode >> 48) & 0xF); + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + public bool AbsB => (_opcode & 0x100000000000) != 0; + public bool NegA => (_opcode & 0x80000000000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + } + + struct InstDsetI + { + private ulong _opcode; + public InstDsetI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool AbsA => (_opcode & 0x40000000000000) != 0; + public bool NegB => (_opcode & 0x20000000000000) != 0; + public bool BVal => (_opcode & 0x10000000000000) != 0; + public FComp FComp => (FComp)((_opcode >> 48) & 0xF); + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + public bool AbsB => (_opcode & 0x100000000000) != 0; + public bool NegA => (_opcode & 0x80000000000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + } + + struct InstDsetC + { + private ulong _opcode; + public InstDsetC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool AbsA => (_opcode & 0x40000000000000) != 0; + public bool NegB => (_opcode & 0x20000000000000) != 0; + public bool BVal => (_opcode & 0x10000000000000) != 0; + public FComp FComp => (FComp)((_opcode >> 48) & 0xF); + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + public bool AbsB => (_opcode & 0x100000000000) != 0; + public bool NegA => (_opcode & 0x80000000000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + } + + struct InstDsetpR + { + private ulong _opcode; + public InstDsetpR(ulong opcode) => _opcode = opcode; + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public FComp FComp => (FComp)((_opcode >> 48) & 0xF); + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + public bool AbsB => (_opcode & 0x100000000000) != 0; + public bool NegA => (_opcode & 0x80000000000) != 0; + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool AbsA => (_opcode & 0x80) != 0; + public bool NegB => (_opcode & 0x40) != 0; + public int DestPred => (int)((_opcode >> 3) & 0x7); + public int DestPredInv => (int)((_opcode >> 0) & 0x7); + } + + struct InstDsetpI + { + private ulong _opcode; + public InstDsetpI(ulong opcode) => _opcode = opcode; + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public FComp FComp => (FComp)((_opcode >> 48) & 0xF); + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + public bool AbsB => (_opcode & 0x100000000000) != 0; + public bool NegA => (_opcode & 0x80000000000) != 0; + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool AbsA => (_opcode & 0x80) != 0; + public bool NegB => (_opcode & 0x40) != 0; + public int DestPred => (int)((_opcode >> 3) & 0x7); + public int DestPredInv => (int)((_opcode >> 0) & 0x7); + } + + struct InstDsetpC + { + private ulong _opcode; + public InstDsetpC(ulong opcode) => _opcode = opcode; + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public FComp FComp => (FComp)((_opcode >> 48) & 0xF); + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + public bool AbsB => (_opcode & 0x100000000000) != 0; + public bool NegA => (_opcode & 0x80000000000) != 0; + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool AbsA => (_opcode & 0x80) != 0; + public bool NegB => (_opcode & 0x40) != 0; + public int DestPred => (int)((_opcode >> 3) & 0x7); + public int DestPredInv => (int)((_opcode >> 0) & 0x7); + } + + struct InstExit + { + private ulong _opcode; + public InstExit(ulong opcode) => _opcode = opcode; + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public Ccc Ccc => (Ccc)((_opcode >> 0) & 0x1F); + public bool KeepRefCnt => (_opcode & 0x20) != 0; + } + + struct InstF2fR + { + private ulong _opcode; + public InstF2fR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public bool Ftz => (_opcode & 0x100000000000) != 0; + public DstFmt DstFmt => (DstFmt)((_opcode >> 8) & 0x3); + public DstFmt SrcFmt => (DstFmt)((_opcode >> 10) & 0x3); + public IntegerRound RoundMode => (IntegerRound)((int)((_opcode >> 40) & 0x4) | (int)((_opcode >> 39) & 0x3)); + public bool Sh => (_opcode & 0x20000000000) != 0; + public bool Sat => (_opcode & 0x4000000000000) != 0; + } + + struct InstF2fI + { + private ulong _opcode; + public InstF2fI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public bool Ftz => (_opcode & 0x100000000000) != 0; + public DstFmt DstFmt => (DstFmt)((_opcode >> 8) & 0x3); + public DstFmt SrcFmt => (DstFmt)((_opcode >> 10) & 0x3); + public IntegerRound RoundMode => (IntegerRound)((int)((_opcode >> 40) & 0x4) | (int)((_opcode >> 39) & 0x3)); + public bool Sh => (_opcode & 0x20000000000) != 0; + public bool Sat => (_opcode & 0x4000000000000) != 0; + } + + struct InstF2fC + { + private ulong _opcode; + public InstF2fC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public bool Ftz => (_opcode & 0x100000000000) != 0; + public DstFmt DstFmt => (DstFmt)((_opcode >> 8) & 0x3); + public DstFmt SrcFmt => (DstFmt)((_opcode >> 10) & 0x3); + public IntegerRound RoundMode => (IntegerRound)((int)((_opcode >> 40) & 0x4) | (int)((_opcode >> 39) & 0x3)); + public bool Sh => (_opcode & 0x20000000000) != 0; + public bool Sat => (_opcode & 0x4000000000000) != 0; + } + + struct InstF2iR + { + private ulong _opcode; + public InstF2iR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public bool Ftz => (_opcode & 0x100000000000) != 0; + public bool Sh => (_opcode & 0x20000000000) != 0; + public IDstFmt IDstFmt => (IDstFmt)((int)((_opcode >> 10) & 0x4) | (int)((_opcode >> 8) & 0x3)); + public DstFmt SrcFmt => (DstFmt)((_opcode >> 10) & 0x3); + public RoundMode2 RoundMode => (RoundMode2)((_opcode >> 39) & 0x3); + } + + struct InstF2iI + { + private ulong _opcode; + public InstF2iI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public bool Ftz => (_opcode & 0x100000000000) != 0; + public bool Sh => (_opcode & 0x20000000000) != 0; + public IDstFmt IDstFmt => (IDstFmt)((int)((_opcode >> 10) & 0x4) | (int)((_opcode >> 8) & 0x3)); + public DstFmt SrcFmt => (DstFmt)((_opcode >> 10) & 0x3); + public RoundMode2 RoundMode => (RoundMode2)((_opcode >> 39) & 0x3); + } + + struct InstF2iC + { + private ulong _opcode; + public InstF2iC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public bool Ftz => (_opcode & 0x100000000000) != 0; + public bool Sh => (_opcode & 0x20000000000) != 0; + public IDstFmt IDstFmt => (IDstFmt)((int)((_opcode >> 10) & 0x4) | (int)((_opcode >> 8) & 0x3)); + public DstFmt SrcFmt => (DstFmt)((_opcode >> 10) & 0x3); + public RoundMode2 RoundMode => (RoundMode2)((_opcode >> 39) & 0x3); + } + + struct InstFaddR + { + private ulong _opcode; + public InstFaddR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Sat => (_opcode & 0x4000000000000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegA => (_opcode & 0x1000000000000) != 0; + public bool AbsA => (_opcode & 0x400000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public bool Ftz => (_opcode & 0x100000000000) != 0; + public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3); + } + + struct InstFaddI + { + private ulong _opcode; + public InstFaddI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Sat => (_opcode & 0x4000000000000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegA => (_opcode & 0x1000000000000) != 0; + public bool AbsA => (_opcode & 0x400000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public bool Ftz => (_opcode & 0x100000000000) != 0; + public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3); + } + + struct InstFaddC + { + private ulong _opcode; + public InstFaddC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Sat => (_opcode & 0x4000000000000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegA => (_opcode & 0x1000000000000) != 0; + public bool AbsA => (_opcode & 0x400000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public bool Ftz => (_opcode & 0x100000000000) != 0; + public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3); + } + + struct InstFadd32i + { + private ulong _opcode; + public InstFadd32i(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x10000000000000) != 0; + public int Imm32 => (int)(_opcode >> 20); + public bool AbsB => (_opcode & 0x200000000000000) != 0; + public bool NegA => (_opcode & 0x100000000000000) != 0; + public bool Ftz => (_opcode & 0x80000000000000) != 0; + public bool AbsA => (_opcode & 0x40000000000000) != 0; + public bool NegB => (_opcode & 0x20000000000000) != 0; + } + + struct InstFchkR + { + private ulong _opcode; + public InstFchkR(ulong opcode) => _opcode = opcode; + public int DestPred => (int)((_opcode >> 3) & 0x7); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegA => (_opcode & 0x1000000000000) != 0; + public bool AbsA => (_opcode & 0x400000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public ChkModeF ChkModeF => (ChkModeF)((_opcode >> 39) & 0x3F); + } + + struct InstFchkI + { + private ulong _opcode; + public InstFchkI(ulong opcode) => _opcode = opcode; + public int DestPred => (int)((_opcode >> 3) & 0x7); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegA => (_opcode & 0x1000000000000) != 0; + public bool AbsA => (_opcode & 0x400000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public ChkModeF ChkModeF => (ChkModeF)((_opcode >> 39) & 0x3F); + } + + struct InstFchkC + { + private ulong _opcode; + public InstFchkC(ulong opcode) => _opcode = opcode; + public int DestPred => (int)((_opcode >> 3) & 0x7); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegA => (_opcode & 0x1000000000000) != 0; + public bool AbsA => (_opcode & 0x400000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public ChkModeF ChkModeF => (ChkModeF)((_opcode >> 39) & 0x3F); + } + + struct InstFcmpR + { + private ulong _opcode; + public InstFcmpR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public FComp FComp => (FComp)((_opcode >> 48) & 0xF); + public bool Ftz => (_opcode & 0x800000000000) != 0; + } + + struct InstFcmpI + { + private ulong _opcode; + public InstFcmpI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public FComp FComp => (FComp)((_opcode >> 48) & 0xF); + public bool Ftz => (_opcode & 0x800000000000) != 0; + } + + struct InstFcmpC + { + private ulong _opcode; + public InstFcmpC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public FComp FComp => (FComp)((_opcode >> 48) & 0xF); + public bool Ftz => (_opcode & 0x800000000000) != 0; + } + + struct InstFcmpRc + { + private ulong _opcode; + public InstFcmpRc(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public FComp FComp => (FComp)((_opcode >> 48) & 0xF); + public bool Ftz => (_opcode & 0x800000000000) != 0; + } + + struct InstFfmaR + { + private ulong _opcode; + public InstFfmaR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool NegA => (_opcode & 0x1000000000000) != 0; + public bool NegC => (_opcode & 0x2000000000000) != 0; + public bool Sat => (_opcode & 0x4000000000000) != 0; + public RoundMode RoundMode => (RoundMode)((_opcode >> 51) & 0x3); + public Fmz Fmz => (Fmz)((_opcode >> 53) & 0x3); + } + + struct InstFfmaI + { + private ulong _opcode; + public InstFfmaI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool NegA => (_opcode & 0x1000000000000) != 0; + public bool NegC => (_opcode & 0x2000000000000) != 0; + public bool Sat => (_opcode & 0x4000000000000) != 0; + public RoundMode RoundMode => (RoundMode)((_opcode >> 51) & 0x3); + public Fmz Fmz => (Fmz)((_opcode >> 53) & 0x3); + } + + struct InstFfmaC + { + private ulong _opcode; + public InstFfmaC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool NegA => (_opcode & 0x1000000000000) != 0; + public bool NegC => (_opcode & 0x2000000000000) != 0; + public bool Sat => (_opcode & 0x4000000000000) != 0; + public RoundMode RoundMode => (RoundMode)((_opcode >> 51) & 0x3); + public Fmz Fmz => (Fmz)((_opcode >> 53) & 0x3); + } + + struct InstFfmaRc + { + private ulong _opcode; + public InstFfmaRc(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool NegA => (_opcode & 0x1000000000000) != 0; + public bool NegC => (_opcode & 0x2000000000000) != 0; + public bool Sat => (_opcode & 0x4000000000000) != 0; + public RoundMode RoundMode => (RoundMode)((_opcode >> 51) & 0x3); + public Fmz Fmz => (Fmz)((_opcode >> 53) & 0x3); + } + + struct InstFfma32i + { + private ulong _opcode; + public InstFfma32i(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm32 => (int)(_opcode >> 20); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool NegC => (_opcode & 0x200000000000000) != 0; + public bool NegA => (_opcode & 0x100000000000000) != 0; + public bool Sat => (_opcode & 0x80000000000000) != 0; + public bool WriteCC => (_opcode & 0x10000000000000) != 0; + public Fmz Fmz => (Fmz)((_opcode >> 53) & 0x3); + } + + struct InstFloR + { + private ulong _opcode; + public InstFloR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Signed => (_opcode & 0x1000000000000) != 0; + public bool Sh => (_opcode & 0x20000000000) != 0; + public bool NegB => (_opcode & 0x10000000000) != 0; + } + + struct InstFloI + { + private ulong _opcode; + public InstFloI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Signed => (_opcode & 0x1000000000000) != 0; + public bool Sh => (_opcode & 0x20000000000) != 0; + public bool NegB => (_opcode & 0x10000000000) != 0; + } + + struct InstFloC + { + private ulong _opcode; + public InstFloC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Signed => (_opcode & 0x1000000000000) != 0; + public bool Sh => (_opcode & 0x20000000000) != 0; + public bool NegB => (_opcode & 0x10000000000) != 0; + } + + struct InstFmnmxR + { + private ulong _opcode; + public InstFmnmxR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegA => (_opcode & 0x1000000000000) != 0; + public bool AbsA => (_opcode & 0x400000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public bool Ftz => (_opcode & 0x100000000000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + } + + struct InstFmnmxI + { + private ulong _opcode; + public InstFmnmxI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegA => (_opcode & 0x1000000000000) != 0; + public bool AbsA => (_opcode & 0x400000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public bool Ftz => (_opcode & 0x100000000000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + } + + struct InstFmnmxC + { + private ulong _opcode; + public InstFmnmxC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegA => (_opcode & 0x1000000000000) != 0; + public bool AbsA => (_opcode & 0x400000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public bool Ftz => (_opcode & 0x100000000000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + } + + struct InstFmulR + { + private ulong _opcode; + public InstFmulR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3); + public Fmz Fmz => (Fmz)((_opcode >> 44) & 0x3); + public MultiplyScale Scale => (MultiplyScale)((_opcode >> 41) & 0x7); + public bool NegA => (_opcode & 0x1000000000000) != 0; + public bool Sat => (_opcode & 0x4000000000000) != 0; + } + + struct InstFmulI + { + private ulong _opcode; + public InstFmulI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3); + public Fmz Fmz => (Fmz)((_opcode >> 44) & 0x3); + public MultiplyScale Scale => (MultiplyScale)((_opcode >> 41) & 0x7); + public bool NegA => (_opcode & 0x1000000000000) != 0; + public bool Sat => (_opcode & 0x4000000000000) != 0; + } + + struct InstFmulC + { + private ulong _opcode; + public InstFmulC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3); + public Fmz Fmz => (Fmz)((_opcode >> 44) & 0x3); + public MultiplyScale Scale => (MultiplyScale)((_opcode >> 41) & 0x7); + public bool NegA => (_opcode & 0x1000000000000) != 0; + public bool Sat => (_opcode & 0x4000000000000) != 0; + } + + struct InstFmul32i + { + private ulong _opcode; + public InstFmul32i(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int Imm32 => (int)(_opcode >> 20); + public bool Sat => (_opcode & 0x80000000000000) != 0; + public Fmz Fmz => (Fmz)((_opcode >> 53) & 0x3); + public bool WriteCC => (_opcode & 0x10000000000000) != 0; + } + + struct InstFsetR + { + private ulong _opcode; + public InstFsetR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool NegA => (_opcode & 0x80000000000) != 0; + public bool NegB => (_opcode & 0x20000000000000) != 0; + public bool AbsA => (_opcode & 0x40000000000000) != 0; + public bool AbsB => (_opcode & 0x100000000000) != 0; + public FComp FComp => (FComp)((_opcode >> 48) & 0xF); + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + public bool Ftz => (_opcode & 0x80000000000000) != 0; + public bool BVal => (_opcode & 0x10000000000000) != 0; + } + + struct InstFsetC + { + private ulong _opcode; + public InstFsetC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool NegA => (_opcode & 0x80000000000) != 0; + public bool NegB => (_opcode & 0x20000000000000) != 0; + public bool AbsA => (_opcode & 0x40000000000000) != 0; + public bool AbsB => (_opcode & 0x100000000000) != 0; + public FComp FComp => (FComp)((_opcode >> 48) & 0xF); + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + public bool Ftz => (_opcode & 0x80000000000000) != 0; + public bool BVal => (_opcode & 0x10000000000000) != 0; + } + + struct InstFsetI + { + private ulong _opcode; + public InstFsetI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool NegA => (_opcode & 0x80000000000) != 0; + public bool NegB => (_opcode & 0x20000000000000) != 0; + public bool AbsA => (_opcode & 0x40000000000000) != 0; + public bool AbsB => (_opcode & 0x100000000000) != 0; + public FComp FComp => (FComp)((_opcode >> 48) & 0xF); + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + public bool Ftz => (_opcode & 0x80000000000000) != 0; + public bool BVal => (_opcode & 0x10000000000000) != 0; + } + + struct InstFsetpR + { + private ulong _opcode; + public InstFsetpR(ulong opcode) => _opcode = opcode; + public int DestPred => (int)((_opcode >> 3) & 0x7); + public int DestPredInv => (int)((_opcode >> 0) & 0x7); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool NegA => (_opcode & 0x80000000000) != 0; + public bool NegB => (_opcode & 0x40) != 0; + public bool AbsA => (_opcode & 0x80) != 0; + public bool AbsB => (_opcode & 0x100000000000) != 0; + public FComp FComp => (FComp)((_opcode >> 48) & 0xF); + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + public bool Ftz => (_opcode & 0x800000000000) != 0; + } + + struct InstFsetpI + { + private ulong _opcode; + public InstFsetpI(ulong opcode) => _opcode = opcode; + public int DestPred => (int)((_opcode >> 3) & 0x7); + public int DestPredInv => (int)((_opcode >> 0) & 0x7); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool NegA => (_opcode & 0x80000000000) != 0; + public bool NegB => (_opcode & 0x40) != 0; + public bool AbsA => (_opcode & 0x80) != 0; + public bool AbsB => (_opcode & 0x100000000000) != 0; + public FComp FComp => (FComp)((_opcode >> 48) & 0xF); + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + public bool Ftz => (_opcode & 0x800000000000) != 0; + } + + struct InstFsetpC + { + private ulong _opcode; + public InstFsetpC(ulong opcode) => _opcode = opcode; + public int DestPred => (int)((_opcode >> 3) & 0x7); + public int DestPredInv => (int)((_opcode >> 0) & 0x7); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool NegA => (_opcode & 0x80000000000) != 0; + public bool NegB => (_opcode & 0x40) != 0; + public bool AbsA => (_opcode & 0x80) != 0; + public bool AbsB => (_opcode & 0x100000000000) != 0; + public FComp FComp => (FComp)((_opcode >> 48) & 0xF); + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + public bool Ftz => (_opcode & 0x800000000000) != 0; + } + + struct InstFswzadd + { + private ulong _opcode; + public InstFswzadd(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Ftz => (_opcode & 0x100000000000) != 0; + public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3); + public bool Ndv => (_opcode & 0x4000000000) != 0; + public int PnWord => (int)((_opcode >> 28) & 0xFF); + } + + struct InstGetcrsptr + { + private ulong _opcode; + public InstGetcrsptr(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + } + + struct InstGetlmembase + { + private ulong _opcode; + public InstGetlmembase(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + } + + struct InstHadd2R + { + private ulong _opcode; + public InstHadd2R(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public OFmt OFmt => (OFmt)((_opcode >> 49) & 0x3); + public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3); + public HalfSwizzle BSwizzle => (HalfSwizzle)((_opcode >> 28) & 0x3); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool NegA => (_opcode & 0x80000000000) != 0; + public bool NegB => (_opcode & 0x80000000) != 0; + public bool AbsA => (_opcode & 0x100000000000) != 0; + public bool AbsB => (_opcode & 0x40000000) != 0; + public bool Sat => (_opcode & 0x100000000) != 0; + public bool Ftz => (_opcode & 0x8000000000) != 0; + } + + struct InstHadd2I + { + private ulong _opcode; + public InstHadd2I(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int BimmH0 => (int)((_opcode >> 20) & 0x3FF); + public int BimmH1 => (int)((_opcode >> 47) & 0x200) | (int)((_opcode >> 30) & 0x1FF); + public OFmt OFmt => (OFmt)((_opcode >> 49) & 0x3); + public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool NegA => (_opcode & 0x80000000000) != 0; + public bool AbsA => (_opcode & 0x100000000000) != 0; + public bool Sat => (_opcode & 0x10000000000000) != 0; + public bool Ftz => (_opcode & 0x8000000000) != 0; + } + + struct InstHadd2C + { + private ulong _opcode; + public InstHadd2C(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public OFmt OFmt => (OFmt)((_opcode >> 49) & 0x3); + public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool NegA => (_opcode & 0x80000000000) != 0; + public bool NegB => (_opcode & 0x100000000000000) != 0; + public bool AbsA => (_opcode & 0x100000000000) != 0; + public bool AbsB => (_opcode & 0x40000000000000) != 0; + public bool Sat => (_opcode & 0x10000000000000) != 0; + public bool Ftz => (_opcode & 0x8000000000) != 0; + } + + struct InstHadd232i + { + private ulong _opcode; + public InstHadd232i(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm => (int)(_opcode >> 20); + public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 53) & 0x3); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool NegA => (_opcode & 0x100000000000000) != 0; + public bool Sat => (_opcode & 0x10000000000000) != 0; + public bool Ftz => (_opcode & 0x80000000000000) != 0; + } + + struct InstHfma2R + { + private ulong _opcode; + public InstHfma2R(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public OFmt OFmt => (OFmt)((_opcode >> 49) & 0x3); + public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3); + public HalfSwizzle BSwizzle => (HalfSwizzle)((_opcode >> 28) & 0x3); + public HalfSwizzle CSwizzle => (HalfSwizzle)((_opcode >> 35) & 0x3); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool NegA => (_opcode & 0x80000000) != 0; + public bool NegC => (_opcode & 0x40000000) != 0; + public bool Sat => (_opcode & 0x100000000) != 0; + public Fmz Fmz => (Fmz)((_opcode >> 37) & 0x3); + } + + struct InstHfma2I + { + private ulong _opcode; + public InstHfma2I(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int BimmH0 => (int)((_opcode >> 20) & 0x3FF); + public int BimmH1 => (int)((_opcode >> 47) & 0x200) | (int)((_opcode >> 30) & 0x1FF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public OFmt OFmt => (OFmt)((_opcode >> 49) & 0x3); + public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3); + public HalfSwizzle CSwizzle => (HalfSwizzle)((_opcode >> 53) & 0x3); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool NegC => (_opcode & 0x8000000000000) != 0; + public bool Sat => (_opcode & 0x10000000000000) != 0; + public Fmz Fmz => (Fmz)((_opcode >> 57) & 0x3); + } + + struct InstHfma2C + { + private ulong _opcode; + public InstHfma2C(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public OFmt OFmt => (OFmt)((_opcode >> 49) & 0x3); + public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3); + public HalfSwizzle CSwizzle => (HalfSwizzle)((_opcode >> 53) & 0x3); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool NegA => (_opcode & 0x100000000000000) != 0; + public bool NegC => (_opcode & 0x8000000000000) != 0; + public bool Sat => (_opcode & 0x10000000000000) != 0; + public Fmz Fmz => (Fmz)((_opcode >> 57) & 0x3); + } + + struct InstHfma2Rc + { + private ulong _opcode; + public InstHfma2Rc(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public OFmt OFmt => (OFmt)((_opcode >> 49) & 0x3); + public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3); + public HalfSwizzle CSwizzle => (HalfSwizzle)((_opcode >> 53) & 0x3); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool NegA => (_opcode & 0x100000000000000) != 0; + public bool NegC => (_opcode & 0x8000000000000) != 0; + public bool Sat => (_opcode & 0x10000000000000) != 0; + public Fmz Fmz => (Fmz)((_opcode >> 57) & 0x3); + } + + struct InstHfma232i + { + private ulong _opcode; + public InstHfma232i(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm => (int)(_opcode >> 20); + public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool NegC => (_opcode & 0x8000000000000) != 0; + public Fmz Fmz => (Fmz)((_opcode >> 57) & 0x3); + } + + struct InstHmul2R + { + private ulong _opcode; + public InstHmul2R(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public OFmt OFmt => (OFmt)((_opcode >> 49) & 0x3); + public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3); + public HalfSwizzle BSwizzle => (HalfSwizzle)((_opcode >> 28) & 0x3); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool NegA => (_opcode & 0x80000000) != 0; + public bool AbsA => (_opcode & 0x100000000000) != 0; + public bool AbsB => (_opcode & 0x40000000) != 0; + public bool Sat => (_opcode & 0x100000000) != 0; + public Fmz Fmz => (Fmz)((_opcode >> 39) & 0x3); + } + + struct InstHmul2I + { + private ulong _opcode; + public InstHmul2I(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int BimmH0 => (int)((_opcode >> 20) & 0x3FF); + public int BimmH1 => (int)((_opcode >> 47) & 0x200) | (int)((_opcode >> 30) & 0x1FF); + public OFmt OFmt => (OFmt)((_opcode >> 49) & 0x3); + public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool NegA => (_opcode & 0x80000000000) != 0; + public bool AbsA => (_opcode & 0x100000000000) != 0; + public bool Sat => (_opcode & 0x10000000000000) != 0; + public Fmz Fmz => (Fmz)((_opcode >> 39) & 0x3); + } + + struct InstHmul2C + { + private ulong _opcode; + public InstHmul2C(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public OFmt OFmt => (OFmt)((_opcode >> 49) & 0x3); + public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool NegA => (_opcode & 0x80000000000) != 0; + public bool AbsA => (_opcode & 0x100000000000) != 0; + public bool AbsB => (_opcode & 0x40000000000000) != 0; + public bool Sat => (_opcode & 0x10000000000000) != 0; + public Fmz Fmz => (Fmz)((_opcode >> 39) & 0x3); + } + + struct InstHmul232i + { + private ulong _opcode; + public InstHmul232i(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm32 => (int)(_opcode >> 20); + public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 53) & 0x3); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool Sat => (_opcode & 0x10000000000000) != 0; + public Fmz Fmz => (Fmz)((_opcode >> 55) & 0x3); + } + + struct InstHset2R + { + private ulong _opcode; + public InstHset2R(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3); + public HalfSwizzle BSwizzle => (HalfSwizzle)((_opcode >> 28) & 0x3); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool NegA => (_opcode & 0x80000000000) != 0; + public bool AbsA => (_opcode & 0x100000000000) != 0; + public bool NegB => (_opcode & 0x80000000) != 0; + public bool AbsB => (_opcode & 0x40000000) != 0; + public bool Bval => (_opcode & 0x2000000000000) != 0; + public FComp Cmp => (FComp)((_opcode >> 35) & 0xF); + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public bool Ftz => (_opcode & 0x4000000000000) != 0; + } + + struct InstHset2I + { + private ulong _opcode; + public InstHset2I(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int BimmH0 => (int)((_opcode >> 20) & 0x3FF); + public int BimmH1 => (int)((_opcode >> 47) & 0x200) | (int)((_opcode >> 30) & 0x1FF); + public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool NegA => (_opcode & 0x80000000000) != 0; + public bool AbsA => (_opcode & 0x100000000000) != 0; + public bool Bval => (_opcode & 0x20000000000000) != 0; + public FComp Cmp => (FComp)((_opcode >> 49) & 0xF); + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public bool Ftz => (_opcode & 0x40000000000000) != 0; + } + + struct InstHset2C + { + private ulong _opcode; + public InstHset2C(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool NegA => (_opcode & 0x80000000000) != 0; + public bool AbsA => (_opcode & 0x100000000000) != 0; + public bool NegB => (_opcode & 0x100000000000000) != 0; + public bool Bval => (_opcode & 0x20000000000000) != 0; + public FComp Cmp => (FComp)((_opcode >> 49) & 0xF); + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public bool Ftz => (_opcode & 0x40000000000000) != 0; + } + + struct InstHsetp2R + { + private ulong _opcode; + public InstHsetp2R(ulong opcode) => _opcode = opcode; + public int DestPred => (int)((_opcode >> 3) & 0x7); + public int DestPredInv => (int)((_opcode >> 0) & 0x7); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool NegA => (_opcode & 0x80000000000) != 0; + public bool NegB => (_opcode & 0x80000000) != 0; + public bool AbsA => (_opcode & 0x100000000000) != 0; + public bool AbsB => (_opcode & 0x40000000) != 0; + public FComp FComp2 => (FComp)((_opcode >> 35) & 0xF); + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + public bool Ftz => (_opcode & 0x40) != 0; + public bool HAnd => (_opcode & 0x2000000000000) != 0; + public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3); + public HalfSwizzle BSwizzle => (HalfSwizzle)((_opcode >> 28) & 0x3); + } + + struct InstHsetp2I + { + private ulong _opcode; + public InstHsetp2I(ulong opcode) => _opcode = opcode; + public int DestPred => (int)((_opcode >> 3) & 0x7); + public int DestPredInv => (int)((_opcode >> 0) & 0x7); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int BimmH0 => (int)((_opcode >> 20) & 0x3FF); + public int BimmH1 => (int)((_opcode >> 47) & 0x200) | (int)((_opcode >> 30) & 0x1FF); + public bool NegA => (_opcode & 0x80000000000) != 0; + public bool AbsA => (_opcode & 0x100000000000) != 0; + public FComp FComp => (FComp)((_opcode >> 49) & 0xF); + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + public bool Ftz => (_opcode & 0x40) != 0; + public bool HAnd => (_opcode & 0x20000000000000) != 0; + public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3); + } + + struct InstHsetp2C + { + private ulong _opcode; + public InstHsetp2C(ulong opcode) => _opcode = opcode; + public int DestPred => (int)((_opcode >> 3) & 0x7); + public int DestPredInv => (int)((_opcode >> 0) & 0x7); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool NegA => (_opcode & 0x80000000000) != 0; + public bool NegB => (_opcode & 0x100000000000000) != 0; + public bool AbsA => (_opcode & 0x100000000000) != 0; + public bool AbsB => (_opcode & 0x40000000000000) != 0; + public FComp FComp => (FComp)((_opcode >> 49) & 0xF); + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + public bool Ftz => (_opcode & 0x40) != 0; + public bool HAnd => (_opcode & 0x20000000000000) != 0; + public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3); + } + + struct InstI2fR + { + private ulong _opcode; + public InstI2fR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public ByteSel ByteSel => (ByteSel)((_opcode >> 41) & 0x3); + public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3); + public ISrcFmt ISrcFmt => (ISrcFmt)((int)((_opcode >> 11) & 0x4) | (int)((_opcode >> 10) & 0x3)); + public DstFmt DstFmt => (DstFmt)((_opcode >> 8) & 0x3); + } + + struct InstI2fI + { + private ulong _opcode; + public InstI2fI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public ByteSel ByteSel => (ByteSel)((_opcode >> 41) & 0x3); + public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3); + public ISrcFmt ISrcFmt => (ISrcFmt)((int)((_opcode >> 11) & 0x4) | (int)((_opcode >> 10) & 0x3)); + public DstFmt DstFmt => (DstFmt)((_opcode >> 8) & 0x3); + } + + struct InstI2fC + { + private ulong _opcode; + public InstI2fC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public ByteSel ByteSel => (ByteSel)((_opcode >> 41) & 0x3); + public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3); + public ISrcFmt ISrcFmt => (ISrcFmt)((int)((_opcode >> 11) & 0x4) | (int)((_opcode >> 10) & 0x3)); + public DstFmt DstFmt => (DstFmt)((_opcode >> 8) & 0x3); + } + + struct InstI2iR + { + private ulong _opcode; + public InstI2iR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Sat => (_opcode & 0x4000000000000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public ByteSel ByteSel => (ByteSel)((_opcode >> 41) & 0x3); + public ISrcDstFmt IDstFmt => (ISrcDstFmt)((int)((_opcode >> 10) & 0x4) | (int)((_opcode >> 8) & 0x3)); + public ISrcDstFmt ISrcFmt => (ISrcDstFmt)((int)((_opcode >> 11) & 0x4) | (int)((_opcode >> 10) & 0x3)); + } + + struct InstI2iI + { + private ulong _opcode; + public InstI2iI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Sat => (_opcode & 0x4000000000000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public ByteSel ByteSel => (ByteSel)((_opcode >> 41) & 0x3); + public ISrcDstFmt IDstFmt => (ISrcDstFmt)((int)((_opcode >> 10) & 0x4) | (int)((_opcode >> 8) & 0x3)); + public ISrcDstFmt ISrcFmt => (ISrcDstFmt)((int)((_opcode >> 11) & 0x4) | (int)((_opcode >> 10) & 0x3)); + } + + struct InstI2iC + { + private ulong _opcode; + public InstI2iC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Sat => (_opcode & 0x4000000000000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public ByteSel ByteSel => (ByteSel)((_opcode >> 41) & 0x3); + public ISrcDstFmt IDstFmt => (ISrcDstFmt)((int)((_opcode >> 10) & 0x4) | (int)((_opcode >> 8) & 0x3)); + public ISrcDstFmt ISrcFmt => (ISrcDstFmt)((int)((_opcode >> 11) & 0x4) | (int)((_opcode >> 10) & 0x3)); + } + + struct InstIaddR + { + private ulong _opcode; + public InstIaddR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Sat => (_opcode & 0x4000000000000) != 0; + public AvgMode AvgMode => (AvgMode)((_opcode >> 48) & 0x3); + public bool X => (_opcode & 0x80000000000) != 0; + } + + struct InstIaddI + { + private ulong _opcode; + public InstIaddI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Sat => (_opcode & 0x4000000000000) != 0; + public AvgMode AvgMode => (AvgMode)((_opcode >> 48) & 0x3); + public bool X => (_opcode & 0x80000000000) != 0; + } + + struct InstIaddC + { + private ulong _opcode; + public InstIaddC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Sat => (_opcode & 0x4000000000000) != 0; + public AvgMode AvgMode => (AvgMode)((_opcode >> 48) & 0x3); + public bool X => (_opcode & 0x80000000000) != 0; + } + + struct InstIadd32i + { + private ulong _opcode; + public InstIadd32i(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int Imm32 => (int)(_opcode >> 20); + public AvgMode AvgMode => (AvgMode)((_opcode >> 55) & 0x3); + public bool Sat => (_opcode & 0x40000000000000) != 0; + public bool WriteCC => (_opcode & 0x10000000000000) != 0; + public bool X => (_opcode & 0x20000000000000) != 0; + } + + struct InstIadd3R + { + private ulong _opcode; + public InstIadd3R(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool NegA => (_opcode & 0x8000000000000) != 0; + public bool NegB => (_opcode & 0x4000000000000) != 0; + public bool NegC => (_opcode & 0x2000000000000) != 0; + public bool X => (_opcode & 0x1000000000000) != 0; + public Lrs Lrs => (Lrs)((_opcode >> 37) & 0x3); + public HalfSelect Apart => (HalfSelect)((_opcode >> 35) & 0x3); + public HalfSelect Bpart => (HalfSelect)((_opcode >> 33) & 0x3); + public HalfSelect Cpart => (HalfSelect)((_opcode >> 31) & 0x3); + } + + struct InstIadd3I + { + private ulong _opcode; + public InstIadd3I(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool NegA => (_opcode & 0x8000000000000) != 0; + public bool NegB => (_opcode & 0x4000000000000) != 0; + public bool NegC => (_opcode & 0x2000000000000) != 0; + public bool X => (_opcode & 0x1000000000000) != 0; + } + + struct InstIadd3C + { + private ulong _opcode; + public InstIadd3C(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool NegA => (_opcode & 0x8000000000000) != 0; + public bool NegB => (_opcode & 0x4000000000000) != 0; + public bool NegC => (_opcode & 0x2000000000000) != 0; + public bool X => (_opcode & 0x1000000000000) != 0; + } + + struct InstIcmpR + { + private ulong _opcode; + public InstIcmpR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public IComp IComp => (IComp)((_opcode >> 49) & 0x7); + public bool Signed => (_opcode & 0x1000000000000) != 0; + } + + struct InstIcmpI + { + private ulong _opcode; + public InstIcmpI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public IComp IComp => (IComp)((_opcode >> 49) & 0x7); + public bool Signed => (_opcode & 0x1000000000000) != 0; + } + + struct InstIcmpC + { + private ulong _opcode; + public InstIcmpC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public IComp IComp => (IComp)((_opcode >> 49) & 0x7); + public bool Signed => (_opcode & 0x1000000000000) != 0; + } + + struct InstIcmpRc + { + private ulong _opcode; + public InstIcmpRc(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public IComp IComp => (IComp)((_opcode >> 49) & 0x7); + public bool Signed => (_opcode & 0x1000000000000) != 0; + } + + struct InstIde + { + private ulong _opcode; + public InstIde(ulong opcode) => _opcode = opcode; + public int Imm16 => (int)((_opcode >> 20) & 0xFFFF); + public bool Di => (_opcode & 0x20) != 0; + } + + struct InstIdpR + { + private ulong _opcode; + public InstIdpR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool IsHi => (_opcode & 0x4000000000000) != 0; + public bool SrcASign => (_opcode & 0x2000000000000) != 0; + public bool IsDp => (_opcode & 0x1000000000000) != 0; + public bool SrcBSign => (_opcode & 0x800000000000) != 0; + } + + struct InstIdpC + { + private ulong _opcode; + public InstIdpC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool IsHi => (_opcode & 0x4000000000000) != 0; + public bool SrcASign => (_opcode & 0x2000000000000) != 0; + public bool IsDp => (_opcode & 0x1000000000000) != 0; + public bool SrcBSign => (_opcode & 0x800000000000) != 0; + } + + struct InstImadR + { + private ulong _opcode; + public InstImadR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Hilo => (_opcode & 0x40000000000000) != 0; + public bool BSigned => (_opcode & 0x20000000000000) != 0; + public AvgMode AvgMode => (AvgMode)((_opcode >> 51) & 0x3); + public bool Sat => (_opcode & 0x4000000000000) != 0; + public bool X => (_opcode & 0x2000000000000) != 0; + public bool ASigned => (_opcode & 0x1000000000000) != 0; + } + + struct InstImadI + { + private ulong _opcode; + public InstImadI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Hilo => (_opcode & 0x40000000000000) != 0; + public bool BSigned => (_opcode & 0x20000000000000) != 0; + public AvgMode AvgMode => (AvgMode)((_opcode >> 51) & 0x3); + public bool Sat => (_opcode & 0x4000000000000) != 0; + public bool X => (_opcode & 0x2000000000000) != 0; + public bool ASigned => (_opcode & 0x1000000000000) != 0; + } + + struct InstImadC + { + private ulong _opcode; + public InstImadC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Hilo => (_opcode & 0x40000000000000) != 0; + public bool BSigned => (_opcode & 0x20000000000000) != 0; + public AvgMode AvgMode => (AvgMode)((_opcode >> 51) & 0x3); + public bool Sat => (_opcode & 0x4000000000000) != 0; + public bool X => (_opcode & 0x2000000000000) != 0; + public bool ASigned => (_opcode & 0x1000000000000) != 0; + } + + struct InstImadRc + { + private ulong _opcode; + public InstImadRc(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Hilo => (_opcode & 0x40000000000000) != 0; + public bool BSigned => (_opcode & 0x20000000000000) != 0; + public AvgMode AvgMode => (AvgMode)((_opcode >> 51) & 0x3); + public bool Sat => (_opcode & 0x4000000000000) != 0; + public bool X => (_opcode & 0x2000000000000) != 0; + public bool ASigned => (_opcode & 0x1000000000000) != 0; + } + + struct InstImad32i + { + private ulong _opcode; + public InstImad32i(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int Imm32 => (int)(_opcode >> 20); + public bool BSigned => (_opcode & 0x200000000000000) != 0; + public AvgMode AvgMode => (AvgMode)((_opcode >> 55) & 0x3); + public bool ASigned => (_opcode & 0x40000000000000) != 0; + public bool WriteCC => (_opcode & 0x10000000000000) != 0; + public bool Hilo => (_opcode & 0x20000000000000) != 0; + } + + struct InstImadspR + { + private ulong _opcode; + public InstImadspR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public ImadspASelect ASelect => (ImadspASelect)((_opcode >> 48) & 0x7); + public ImadspBSelect BSelect => (ImadspBSelect)((_opcode >> 53) & 0x3); + public ImadspASelect CSelect => (ImadspASelect)((int)((_opcode >> 50) & 0x6) | (int)((_opcode >> 48) & 0x1)); + } + + struct InstImadspI + { + private ulong _opcode; + public InstImadspI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public ImadspASelect ASelect => (ImadspASelect)((_opcode >> 48) & 0x7); + public ImadspBSelect BSelect => (ImadspBSelect)((_opcode >> 53) & 0x3); + public ImadspASelect CSelect => (ImadspASelect)((int)((_opcode >> 50) & 0x6) | (int)((_opcode >> 48) & 0x1)); + } + + struct InstImadspC + { + private ulong _opcode; + public InstImadspC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public ImadspASelect ASelect => (ImadspASelect)((_opcode >> 48) & 0x7); + public ImadspBSelect BSelect => (ImadspBSelect)((_opcode >> 53) & 0x3); + public ImadspASelect CSelect => (ImadspASelect)((int)((_opcode >> 50) & 0x6) | (int)((_opcode >> 48) & 0x1)); + } + + struct InstImadspRc + { + private ulong _opcode; + public InstImadspRc(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public ImadspASelect ASelect => (ImadspASelect)((_opcode >> 48) & 0x7); + public ImadspBSelect BSelect => (ImadspBSelect)((_opcode >> 53) & 0x3); + public ImadspASelect CSelect => (ImadspASelect)((int)((_opcode >> 50) & 0x6) | (int)((_opcode >> 48) & 0x1)); + } + + struct InstImnmxR + { + private ulong _opcode; + public InstImnmxR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Signed => (_opcode & 0x1000000000000) != 0; + public XMode XMode => (XMode)((_opcode >> 43) & 0x3); + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + } + + struct InstImnmxI + { + private ulong _opcode; + public InstImnmxI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Signed => (_opcode & 0x1000000000000) != 0; + public XMode XMode => (XMode)((_opcode >> 43) & 0x3); + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + } + + struct InstImnmxC + { + private ulong _opcode; + public InstImnmxC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Signed => (_opcode & 0x1000000000000) != 0; + public XMode XMode => (XMode)((_opcode >> 43) & 0x3); + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + } + + struct InstImulR + { + private ulong _opcode; + public InstImulR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool ASigned => (_opcode & 0x10000000000) != 0; + public bool BSigned => (_opcode & 0x20000000000) != 0; + public bool Hilo => (_opcode & 0x8000000000) != 0; + } + + struct InstImulI + { + private ulong _opcode; + public InstImulI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool ASigned => (_opcode & 0x10000000000) != 0; + public bool BSigned => (_opcode & 0x20000000000) != 0; + public bool Hilo => (_opcode & 0x8000000000) != 0; + } + + struct InstImulC + { + private ulong _opcode; + public InstImulC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool ASigned => (_opcode & 0x10000000000) != 0; + public bool BSigned => (_opcode & 0x20000000000) != 0; + public bool Hilo => (_opcode & 0x8000000000) != 0; + } + + struct InstImul32i + { + private ulong _opcode; + public InstImul32i(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int Imm32 => (int)(_opcode >> 20); + public bool ASigned => (_opcode & 0x40000000000000) != 0; + public bool BSigned => (_opcode & 0x80000000000000) != 0; + public bool Hilo => (_opcode & 0x20000000000000) != 0; + public bool WriteCC => (_opcode & 0x10000000000000) != 0; + } + + struct InstIpa + { + private ulong _opcode; + public InstIpa(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public IpaOp IpaOp => (IpaOp)((_opcode >> 54) & 0x3); + public int Msi => (int)((_opcode >> 52) & 0x3); + public bool Sat => (_opcode & 0x8000000000000) != 0; + public bool Idx => (_opcode & 0x4000000000) != 0; + public int Imm10 => (int)((_opcode >> 28) & 0x3FF); + public int SrcPred => (int)((_opcode >> 47) & 0x7); + public bool SrcPredInv => (_opcode & 0x4000000000000) != 0; + } + + struct InstIsberd + { + private ulong _opcode; + public InstIsberd(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public AlSize AlSize => (AlSize)((_opcode >> 47) & 0x3); + public IBase IBase => (IBase)((_opcode >> 33) & 0x3); + public bool O => (_opcode & 0x100000000) != 0; + public bool P => (_opcode & 0x80000000) != 0; + } + + struct InstIscaddR + { + private ulong _opcode; + public InstIscaddR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public int Imm5 => (int)((_opcode >> 39) & 0x1F); + public AvgMode AvgMode => (AvgMode)((_opcode >> 48) & 0x3); + } + + struct InstIscaddI + { + private ulong _opcode; + public InstIscaddI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public int Imm5 => (int)((_opcode >> 39) & 0x1F); + public AvgMode AvgMode => (AvgMode)((_opcode >> 48) & 0x3); + } + + struct InstIscaddC + { + private ulong _opcode; + public InstIscaddC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public int Imm5 => (int)((_opcode >> 39) & 0x1F); + public AvgMode AvgMode => (AvgMode)((_opcode >> 48) & 0x3); + } + + struct InstIscadd32i + { + private ulong _opcode; + public InstIscadd32i(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int Imm32 => (int)(_opcode >> 20); + public bool WriteCC => (_opcode & 0x10000000000000) != 0; + public int Imm5 => (int)((_opcode >> 53) & 0x1F); + } + + struct InstIsetR + { + private ulong _opcode; + public InstIsetR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public IComp IComp => (IComp)((_opcode >> 49) & 0x7); + public bool Signed => (_opcode & 0x1000000000000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public bool BVal => (_opcode & 0x100000000000) != 0; + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + public bool X => (_opcode & 0x80000000000) != 0; + } + + struct InstIsetI + { + private ulong _opcode; + public InstIsetI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public IComp IComp => (IComp)((_opcode >> 49) & 0x7); + public bool Signed => (_opcode & 0x1000000000000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public bool BVal => (_opcode & 0x100000000000) != 0; + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + public bool X => (_opcode & 0x80000000000) != 0; + } + + struct InstIsetC + { + private ulong _opcode; + public InstIsetC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public IComp IComp => (IComp)((_opcode >> 49) & 0x7); + public bool Signed => (_opcode & 0x1000000000000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public bool BVal => (_opcode & 0x100000000000) != 0; + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + public bool X => (_opcode & 0x80000000000) != 0; + } + + struct InstIsetpR + { + private ulong _opcode; + public InstIsetpR(ulong opcode) => _opcode = opcode; + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public IComp IComp => (IComp)((_opcode >> 49) & 0x7); + public bool Signed => (_opcode & 0x1000000000000) != 0; + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + public bool X => (_opcode & 0x80000000000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public int DestPred => (int)((_opcode >> 3) & 0x7); + public int DestPredInv => (int)((_opcode >> 0) & 0x7); + } + + struct InstIsetpI + { + private ulong _opcode; + public InstIsetpI(ulong opcode) => _opcode = opcode; + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public IComp IComp => (IComp)((_opcode >> 49) & 0x7); + public bool Signed => (_opcode & 0x1000000000000) != 0; + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + public bool X => (_opcode & 0x80000000000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public int DestPred => (int)((_opcode >> 3) & 0x7); + public int DestPredInv => (int)((_opcode >> 0) & 0x7); + } + + struct InstIsetpC + { + private ulong _opcode; + public InstIsetpC(ulong opcode) => _opcode = opcode; + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public IComp IComp => (IComp)((_opcode >> 49) & 0x7); + public bool Signed => (_opcode & 0x1000000000000) != 0; + public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3); + public bool X => (_opcode & 0x80000000000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public int DestPred => (int)((_opcode >> 3) & 0x7); + public int DestPredInv => (int)((_opcode >> 0) & 0x7); + } + + struct InstJcal + { + private ulong _opcode; + public InstJcal(ulong opcode) => _opcode = opcode; + public int Imm32 => (int)(_opcode >> 20); + public bool Ca => (_opcode & 0x20) != 0; + public bool Inc => (_opcode & 0x40) != 0; + } + + struct InstJmp + { + private ulong _opcode; + public InstJmp(ulong opcode) => _opcode = opcode; + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public Ccc Ccc => (Ccc)((_opcode >> 0) & 0x1F); + public bool Ca => (_opcode & 0x20) != 0; + public int Imm32 => (int)(_opcode >> 20); + public bool Lmt => (_opcode & 0x40) != 0; + public bool U => (_opcode & 0x80) != 0; + } + + struct InstJmx + { + private ulong _opcode; + public InstJmx(ulong opcode) => _opcode = opcode; + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public Ccc Ccc => (Ccc)((_opcode >> 0) & 0x1F); + public bool Ca => (_opcode & 0x20) != 0; + public int Imm32 => (int)(_opcode >> 20); + public bool Lmt => (_opcode & 0x40) != 0; + } + + struct InstKil + { + private ulong _opcode; + public InstKil(ulong opcode) => _opcode = opcode; + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public Ccc Ccc => (Ccc)((_opcode >> 0) & 0x1F); + } + + struct InstLd + { + private ulong _opcode; + public InstLd(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int SrcPred => (int)((_opcode >> 58) & 0x7); + public CacheOpLd CacheOp => (CacheOpLd)((_opcode >> 56) & 0x3); + public LsSize LsSize => (LsSize)((_opcode >> 53) & 0x7); + public bool E => (_opcode & 0x10000000000000) != 0; + public int Imm32 => (int)(_opcode >> 20); + } + + struct InstLdc + { + private ulong _opcode; + public InstLdc(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public LsSize2 LsSize => (LsSize2)((_opcode >> 48) & 0x7); + public AddressMode AddressMode => (AddressMode)((_opcode >> 44) & 0x3); + public int CbufSlot => (int)((_opcode >> 36) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0xFFFF); + } + + struct InstLdg + { + private ulong _opcode; + public InstLdg(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public LsSize LsSize => (LsSize)((_opcode >> 48) & 0x7); + public CacheOpLd CacheOp => (CacheOpLd)((_opcode >> 46) & 0x3); + public bool E => (_opcode & 0x200000000000) != 0; + public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF); + } + + struct InstLdl + { + private ulong _opcode; + public InstLdl(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public LsSize2 LsSize => (LsSize2)((_opcode >> 48) & 0x7); + public CacheOp2 CacheOp => (CacheOp2)((_opcode >> 44) & 0x3); + public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF); + } + + struct InstLds + { + private ulong _opcode; + public InstLds(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public LsSize2 LsSize => (LsSize2)((_opcode >> 48) & 0x7); + public bool U => (_opcode & 0x100000000000) != 0; + public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF); + } + + struct InstLeaR + { + private ulong _opcode; + public InstLeaR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool X => (_opcode & 0x400000000000) != 0; + public bool NegA => (_opcode & 0x200000000000) != 0; + public int ImmU5 => (int)((_opcode >> 39) & 0x1F); + public int DestPred => (int)((_opcode >> 48) & 0x7); + } + + struct InstLeaI + { + private ulong _opcode; + public InstLeaI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool X => (_opcode & 0x400000000000) != 0; + public bool NegA => (_opcode & 0x200000000000) != 0; + public int ImmU5 => (int)((_opcode >> 39) & 0x1F); + public int DestPred => (int)((_opcode >> 48) & 0x7); + } + + struct InstLeaC + { + private ulong _opcode; + public InstLeaC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool X => (_opcode & 0x400000000000) != 0; + public bool NegA => (_opcode & 0x200000000000) != 0; + public int ImmU5 => (int)((_opcode >> 39) & 0x1F); + public int DestPred => (int)((_opcode >> 48) & 0x7); + } + + struct InstLeaHiR + { + private ulong _opcode; + public InstLeaHiR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool X => (_opcode & 0x4000000000) != 0; + public bool NegA => (_opcode & 0x2000000000) != 0; + public int ImmU5 => (int)((_opcode >> 28) & 0x1F); + public int DestPred => (int)((_opcode >> 48) & 0x7); + } + + struct InstLeaHiC + { + private ulong _opcode; + public InstLeaHiC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool X => (_opcode & 0x200000000000000) != 0; + public bool NegA => (_opcode & 0x100000000000000) != 0; + public int ImmU5 => (int)((_opcode >> 51) & 0x1F); + public int DestPred => (int)((_opcode >> 48) & 0x7); + } + + struct InstLepc + { + private ulong _opcode; + public InstLepc(ulong opcode) => _opcode = opcode; + } + + struct InstLongjmp + { + private ulong _opcode; + public InstLongjmp(ulong opcode) => _opcode = opcode; + public Ccc Ccc => (Ccc)((_opcode >> 0) & 0x1F); + } + + struct InstLopR + { + private ulong _opcode; + public InstLopR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public int DestPred => (int)((_opcode >> 48) & 0x7); + public PredicateOp PredicateOp => (PredicateOp)((_opcode >> 44) & 0x3); + public bool X => (_opcode & 0x80000000000) != 0; + public LogicOp Lop => (LogicOp)((_opcode >> 41) & 0x3); + public bool NegA => (_opcode & 0x8000000000) != 0; + public bool NegB => (_opcode & 0x10000000000) != 0; + } + + struct InstLopI + { + private ulong _opcode; + public InstLopI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public int DestPred => (int)((_opcode >> 48) & 0x7); + public PredicateOp PredicateOp => (PredicateOp)((_opcode >> 44) & 0x3); + public bool X => (_opcode & 0x80000000000) != 0; + public LogicOp LogicOp => (LogicOp)((_opcode >> 41) & 0x3); + public bool NegA => (_opcode & 0x8000000000) != 0; + public bool NegB => (_opcode & 0x10000000000) != 0; + } + + struct InstLopC + { + private ulong _opcode; + public InstLopC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public int DestPred => (int)((_opcode >> 48) & 0x7); + public PredicateOp PredicateOp => (PredicateOp)((_opcode >> 44) & 0x3); + public bool X => (_opcode & 0x80000000000) != 0; + public LogicOp LogicOp => (LogicOp)((_opcode >> 41) & 0x3); + public bool NegA => (_opcode & 0x8000000000) != 0; + public bool NegB => (_opcode & 0x10000000000) != 0; + } + + struct InstLop3R + { + private ulong _opcode; + public InstLop3R(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public int DestPred => (int)((_opcode >> 48) & 0x7); + public PredicateOp PredicateOp => (PredicateOp)((_opcode >> 36) & 0x3); + public bool X => (_opcode & 0x4000000000) != 0; + public int Imm => (int)((_opcode >> 28) & 0xFF); + } + + struct InstLop3I + { + private ulong _opcode; + public InstLop3I(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool X => (_opcode & 0x200000000000000) != 0; + public int Imm => (int)((_opcode >> 48) & 0xFF); + } + + struct InstLop3C + { + private ulong _opcode; + public InstLop3C(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool X => (_opcode & 0x100000000000000) != 0; + public int Imm => (int)((_opcode >> 48) & 0xFF); + } + + struct InstLop32i + { + private ulong _opcode; + public InstLop32i(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x10000000000000) != 0; + public int Imm32 => (int)(_opcode >> 20); + public bool X => (_opcode & 0x200000000000000) != 0; + public LogicOp LogicOp => (LogicOp)((_opcode >> 53) & 0x3); + public bool NegA => (_opcode & 0x80000000000000) != 0; + public bool NegB => (_opcode & 0x100000000000000) != 0; + } + + struct InstMembar + { + private ulong _opcode; + public InstMembar(ulong opcode) => _opcode = opcode; + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public Membar Membar => (Membar)((_opcode >> 8) & 0x3); + public Ivall Ivall => (Ivall)((_opcode >> 0) & 0x3); + } + + struct InstMovR + { + private ulong _opcode; + public InstMovR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int QuadMask => (int)((_opcode >> 39) & 0xF); + } + + struct InstMovI + { + private ulong _opcode; + public InstMovI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int QuadMask => (int)((_opcode >> 39) & 0xF); + } + + struct InstMovC + { + private ulong _opcode; + public InstMovC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int QuadMask => (int)((_opcode >> 39) & 0xF); + } + + struct InstMov32i + { + private ulong _opcode; + public InstMov32i(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int Imm32 => (int)(_opcode >> 20); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int QuadMask => (int)((_opcode >> 12) & 0xF); + } + + struct InstMufu + { + private ulong _opcode; + public InstMufu(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public MufuOp MufuOp => (MufuOp)((_opcode >> 20) & 0xF); + public bool AbsA => (_opcode & 0x400000000000) != 0; + public bool NegA => (_opcode & 0x1000000000000) != 0; + public bool Sat => (_opcode & 0x4000000000000) != 0; + } + + struct InstNop + { + private ulong _opcode; + public InstNop(ulong opcode) => _opcode = opcode; + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int Imm16 => (int)((_opcode >> 20) & 0xFFFF); + public bool Trig => (_opcode & 0x2000) != 0; + public Ccc Ccc => (Ccc)((_opcode >> 8) & 0x1F); + } + + struct InstOutR + { + private ulong _opcode; + public InstOutR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public OutType OutType => (OutType)((_opcode >> 39) & 0x3); + } + + struct InstOutI + { + private ulong _opcode; + public InstOutI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public OutType OutType => (OutType)((_opcode >> 39) & 0x3); + } + + struct InstOutC + { + private ulong _opcode; + public InstOutC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public OutType OutType => (OutType)((_opcode >> 39) & 0x3); + } + + struct InstP2rR + { + private ulong _opcode; + public InstP2rR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public ByteSel ByteSel => (ByteSel)((_opcode >> 41) & 0x3); + public bool Ccpr => (_opcode & 0x10000000000) != 0; + } + + struct InstP2rI + { + private ulong _opcode; + public InstP2rI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public ByteSel ByteSel => (ByteSel)((_opcode >> 41) & 0x3); + public bool Ccpr => (_opcode & 0x10000000000) != 0; + } + + struct InstP2rC + { + private ulong _opcode; + public InstP2rC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public ByteSel ByteSel => (ByteSel)((_opcode >> 41) & 0x3); + public bool Ccpr => (_opcode & 0x10000000000) != 0; + } + + struct InstPbk + { + private ulong _opcode; + public InstPbk(ulong opcode) => _opcode = opcode; + public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF); + public bool Ca => (_opcode & 0x20) != 0; + } + + struct InstPcnt + { + private ulong _opcode; + public InstPcnt(ulong opcode) => _opcode = opcode; + public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF); + public bool Ca => (_opcode & 0x20) != 0; + } + + struct InstPexit + { + private ulong _opcode; + public InstPexit(ulong opcode) => _opcode = opcode; + public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF); + } + + struct InstPixld + { + private ulong _opcode; + public InstPixld(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int DestPred => (int)((_opcode >> 45) & 0x7); + public PixMode PixMode => (PixMode)((_opcode >> 31) & 0x7); + public int Imm8 => (int)((_opcode >> 20) & 0xFF); + } + + struct InstPlongjmp + { + private ulong _opcode; + public InstPlongjmp(ulong opcode) => _opcode = opcode; + public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF); + public bool Ca => (_opcode & 0x20) != 0; + } + + struct InstPopcR + { + private ulong _opcode; + public InstPopcR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool NegB => (_opcode & 0x10000000000) != 0; + } + + struct InstPopcI + { + private ulong _opcode; + public InstPopcI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool NegB => (_opcode & 0x10000000000) != 0; + } + + struct InstPopcC + { + private ulong _opcode; + public InstPopcC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool NegB => (_opcode & 0x10000000000) != 0; + } + + struct InstPret + { + private ulong _opcode; + public InstPret(ulong opcode) => _opcode = opcode; + public bool Ca => (_opcode & 0x20) != 0; + public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF); + public bool Inc => (_opcode & 0x40) != 0; + } + + struct InstPrmtR + { + private ulong _opcode; + public InstPrmtR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public PMode PMode => (PMode)((_opcode >> 48) & 0xF); + } + + struct InstPrmtI + { + private ulong _opcode; + public InstPrmtI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public PMode PMode => (PMode)((_opcode >> 48) & 0xF); + } + + struct InstPrmtC + { + private ulong _opcode; + public InstPrmtC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public PMode PMode => (PMode)((_opcode >> 48) & 0xF); + } + + struct InstPrmtRc + { + private ulong _opcode; + public InstPrmtRc(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public PMode PMode => (PMode)((_opcode >> 48) & 0xF); + } + + struct InstPset + { + private ulong _opcode; + public InstPset(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public int Src2Pred => (int)((_opcode >> 12) & 0x7); + public bool Src2PredInv => (_opcode & 0x8000) != 0; + public int Src1Pred => (int)((_opcode >> 29) & 0x7); + public bool Src1PredInv => (_opcode & 0x100000000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public BoolOp BoolOpAB => (BoolOp)((_opcode >> 24) & 0x3); + public BoolOp BoolOpC => (BoolOp)((_opcode >> 45) & 0x3); + public bool BVal => (_opcode & 0x100000000000) != 0; + } + + struct InstPsetp + { + private ulong _opcode; + public InstPsetp(ulong opcode) => _opcode = opcode; + public int DestPred => (int)((_opcode >> 3) & 0x7); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int DestPredInv => (int)((_opcode >> 0) & 0x7); + public int Src2Pred => (int)((_opcode >> 12) & 0x7); + public bool Src2PredInv => (_opcode & 0x8000) != 0; + public int Src1Pred => (int)((_opcode >> 29) & 0x7); + public bool Src1PredInv => (_opcode & 0x100000000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public BoolOp BoolOpAB => (BoolOp)((_opcode >> 24) & 0x3); + public BoolOp BoolOpC => (BoolOp)((_opcode >> 45) & 0x3); + } + + struct InstR2b + { + private ulong _opcode; + public InstR2b(ulong opcode) => _opcode = opcode; + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public BarMode Mode => (BarMode)((_opcode >> 32) & 0x3); + public int Name => (int)((_opcode >> 28) & 0xF); + } + + struct InstR2pR + { + private ulong _opcode; + public InstR2pR(ulong opcode) => _opcode = opcode; + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public ByteSel ByteSel => (ByteSel)((_opcode >> 41) & 0x3); + public bool Ccpr => (_opcode & 0x10000000000) != 0; + } + + struct InstR2pI + { + private ulong _opcode; + public InstR2pI(ulong opcode) => _opcode = opcode; + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public ByteSel ByteSel => (ByteSel)((_opcode >> 41) & 0x3); + public bool Ccpr => (_opcode & 0x10000000000) != 0; + } + + struct InstR2pC + { + private ulong _opcode; + public InstR2pC(ulong opcode) => _opcode = opcode; + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public ByteSel ByteSel => (ByteSel)((_opcode >> 41) & 0x3); + public bool Ccpr => (_opcode & 0x10000000000) != 0; + } + + struct InstRam + { + private ulong _opcode; + public InstRam(ulong opcode) => _opcode = opcode; + } + + struct InstRed + { + private ulong _opcode; + public InstRed(ulong opcode) => _opcode = opcode; + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 0) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int Imm20 => (int)((_opcode >> 28) & 0xFFFFF); + public AtomSize RedSize => (AtomSize)((_opcode >> 20) & 0x7); + public RedOp RedOp => (RedOp)((_opcode >> 23) & 0x7); + public bool E => (_opcode & 0x1000000000000) != 0; + } + + struct InstRet + { + private ulong _opcode; + public InstRet(ulong opcode) => _opcode = opcode; + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public Ccc Ccc => (Ccc)((_opcode >> 0) & 0x1F); + } + + struct InstRroR + { + private ulong _opcode; + public InstRroR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public bool RroOp => (_opcode & 0x8000000000) != 0; + } + + struct InstRroI + { + private ulong _opcode; + public InstRroI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public bool RroOp => (_opcode & 0x8000000000) != 0; + } + + struct InstRroC + { + private ulong _opcode; + public InstRroC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool AbsB => (_opcode & 0x2000000000000) != 0; + public bool NegB => (_opcode & 0x200000000000) != 0; + public bool RroOp => (_opcode & 0x8000000000) != 0; + } + + struct InstRtt + { + private ulong _opcode; + public InstRtt(ulong opcode) => _opcode = opcode; + } + + struct InstS2r + { + private ulong _opcode; + public InstS2r(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public SReg SReg => (SReg)((_opcode >> 20) & 0xFF); + } + + struct InstSam + { + private ulong _opcode; + public InstSam(ulong opcode) => _opcode = opcode; + } + + struct InstSelR + { + private ulong _opcode; + public InstSelR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + } + + struct InstSelI + { + private ulong _opcode; + public InstSelI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + } + + struct InstSelC + { + private ulong _opcode; + public InstSelC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + } + + struct InstSetcrsptr + { + private ulong _opcode; + public InstSetcrsptr(ulong opcode) => _opcode = opcode; + public int SrcA => (int)((_opcode >> 8) & 0xFF); + } + + struct InstSetlmembase + { + private ulong _opcode; + public InstSetlmembase(ulong opcode) => _opcode = opcode; + public int SrcA => (int)((_opcode >> 8) & 0xFF); + } + + struct InstShfLR + { + private ulong _opcode; + public InstShfLR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool M => (_opcode & 0x4000000000000) != 0; + public XModeShf XModeShf => (XModeShf)((_opcode >> 48) & 0x3); + public MaxShift MaxShift => (MaxShift)((_opcode >> 37) & 0x3); + } + + struct InstShfRR + { + private ulong _opcode; + public InstShfRR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool M => (_opcode & 0x4000000000000) != 0; + public XModeShf XModeShf => (XModeShf)((_opcode >> 48) & 0x3); + public MaxShift MaxShift => (MaxShift)((_opcode >> 37) & 0x3); + } + + struct InstShfLI + { + private ulong _opcode; + public InstShfLI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool M => (_opcode & 0x4000000000000) != 0; + public XModeShf XModeShf => (XModeShf)((_opcode >> 48) & 0x3); + public MaxShift MaxShift => (MaxShift)((_opcode >> 37) & 0x3); + public int Imm6 => (int)((_opcode >> 20) & 0x3F); + } + + struct InstShfRI + { + private ulong _opcode; + public InstShfRI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool M => (_opcode & 0x4000000000000) != 0; + public XModeShf XModeShf => (XModeShf)((_opcode >> 48) & 0x3); + public MaxShift MaxShift => (MaxShift)((_opcode >> 37) & 0x3); + public int Imm6 => (int)((_opcode >> 20) & 0x3F); + } + + struct InstShfl + { + private ulong _opcode; + public InstShfl(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int SrcBImm => (int)((_opcode >> 20) & 0x1F); + public int SrcCImm => (int)((_opcode >> 34) & 0x1FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public ShflMode ShflMode => (ShflMode)((_opcode >> 30) & 0x3); + public bool CFixShfl => (_opcode & 0x20000000) != 0; + public bool BFixShfl => (_opcode & 0x10000000) != 0; + public int DestPred => (int)((_opcode >> 48) & 0x7); + } + + struct InstShlR + { + private ulong _opcode; + public InstShlR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool X => (_opcode & 0x80000000000) != 0; + public bool M => (_opcode & 0x8000000000) != 0; + } + + struct InstShlI + { + private ulong _opcode; + public InstShlI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool X => (_opcode & 0x80000000000) != 0; + public bool M => (_opcode & 0x8000000000) != 0; + } + + struct InstShlC + { + private ulong _opcode; + public InstShlC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool X => (_opcode & 0x80000000000) != 0; + public bool M => (_opcode & 0x8000000000) != 0; + } + + struct InstShrR + { + private ulong _opcode; + public InstShrR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Signed => (_opcode & 0x1000000000000) != 0; + public XMode XMode => (XMode)((_opcode >> 43) & 0x3); + public bool Brev => (_opcode & 0x10000000000) != 0; + public bool M => (_opcode & 0x8000000000) != 0; + } + + struct InstShrI + { + private ulong _opcode; + public InstShrI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Signed => (_opcode & 0x1000000000000) != 0; + public XMode XMode => (XMode)((_opcode >> 43) & 0x3); + public bool Brev => (_opcode & 0x10000000000) != 0; + public bool M => (_opcode & 0x8000000000) != 0; + } + + struct InstShrC + { + private ulong _opcode; + public InstShrC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Signed => (_opcode & 0x1000000000000) != 0; + public XMode XMode => (XMode)((_opcode >> 43) & 0x3); + public bool Brev => (_opcode & 0x10000000000) != 0; + public bool M => (_opcode & 0x8000000000) != 0; + } + + struct InstSsy + { + private ulong _opcode; + public InstSsy(ulong opcode) => _opcode = opcode; + public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF); + public bool Ca => (_opcode & 0x20) != 0; + } + + struct InstSt + { + private ulong _opcode; + public InstSt(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int SrcPred => (int)((_opcode >> 58) & 0x7); + public CacheOpSt CacheOp => (CacheOpSt)((_opcode >> 56) & 0x3); + public LsSize LsSize => (LsSize)((_opcode >> 53) & 0x7); + public bool E => (_opcode & 0x10000000000000) != 0; + public int Imm32 => (int)(_opcode >> 20); + } + + struct InstStg + { + private ulong _opcode; + public InstStg(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public LsSize2 LsSize => (LsSize2)((_opcode >> 48) & 0x7); + public CacheOpSt CacheOp => (CacheOpSt)((_opcode >> 46) & 0x3); + public bool E => (_opcode & 0x200000000000) != 0; + public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF); + } + + struct InstStl + { + private ulong _opcode; + public InstStl(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public LsSize2 LsSize => (LsSize2)((_opcode >> 48) & 0x7); + public CacheOpSt CacheOp => (CacheOpSt)((_opcode >> 44) & 0x3); + public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF); + } + + struct InstStp + { + private ulong _opcode; + public InstStp(ulong opcode) => _opcode = opcode; + public bool Wait => (_opcode & 0x80000000) != 0; + public int Imm8 => (int)((_opcode >> 20) & 0xFF); + } + + struct InstSts + { + private ulong _opcode; + public InstSts(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public LsSize2 LsSize => (LsSize2)((_opcode >> 48) & 0x7); + public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF); + } + + struct InstSuatomB + { + private ulong _opcode; + public InstSuatomB(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3); + public SuatomSize Size => (SuatomSize)((_opcode >> 36) & 0x7); + public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7); + public SuatomOp Op => (SuatomOp)((_opcode >> 29) & 0xF); + public bool Ba => (_opcode & 0x10000000) != 0; + } + + struct InstSuatom + { + private ulong _opcode; + public InstSuatom(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public SuatomSize Size => (SuatomSize)((_opcode >> 51) & 0x7); + public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3); + public int TidB => (int)((_opcode >> 36) & 0x1FFF); + public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7); + public SuatomOp Op => (SuatomOp)((_opcode >> 29) & 0xF); + public bool Ba => (_opcode & 0x10000000) != 0; + } + + struct InstSuatomB2 + { + private ulong _opcode; + public InstSuatomB2(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int DestPred => (int)((_opcode >> 51) & 0x7); + public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3); + public SuatomSize Size => (SuatomSize)((_opcode >> 36) & 0x7); + public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7); + public SuatomOp Op => (SuatomOp)((_opcode >> 29) & 0xF); + public bool Ba => (_opcode & 0x10000000) != 0; + } + + struct InstSuatomCasB + { + private ulong _opcode; + public InstSuatomCasB(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3); + public SuatomSize Size => (SuatomSize)((_opcode >> 36) & 0x7); + public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7); + public int DestPred => (int)((_opcode >> 30) & 0x7); + public bool Ba => (_opcode & 0x10000000) != 0; + } + + struct InstSuatomCas + { + private ulong _opcode; + public InstSuatomCas(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public SuatomSize Size => (SuatomSize)((_opcode >> 51) & 0x7); + public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3); + public int TidB => (int)((_opcode >> 36) & 0x1FFF); + public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7); + public int DestPred => (int)((_opcode >> 30) & 0x7); + public bool Ba => (_opcode & 0x10000000) != 0; + } + + struct InstSuldDB + { + private ulong _opcode; + public InstSuldDB(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3); + public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7); + public int DestPred2 => (int)((_opcode >> 30) & 0x7); + public CacheOpLd CacheOp => (CacheOpLd)((_opcode >> 24) & 0x3); + public bool Ba => (_opcode & 0x800000) != 0; + public SuSize Size => (SuSize)((_opcode >> 20) & 0x7); + } + + struct InstSuldD + { + private ulong _opcode; + public InstSuldD(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3); + public int TidB => (int)((_opcode >> 36) & 0x1FFF); + public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7); + public int DestPred2 => (int)((_opcode >> 30) & 0x7); + public CacheOpLd CacheOp => (CacheOpLd)((_opcode >> 24) & 0x3); + public bool Ba => (_opcode & 0x800000) != 0; + public SuSize Size => (SuSize)((_opcode >> 20) & 0x7); + } + + struct InstSuldB + { + private ulong _opcode; + public InstSuldB(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3); + public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7); + public int DestPred2 => (int)((_opcode >> 30) & 0x7); + public CacheOpLd CacheOp => (CacheOpLd)((_opcode >> 24) & 0x3); + public SuRgba Rgba => (SuRgba)((_opcode >> 20) & 0xF); + } + + struct InstSuld + { + private ulong _opcode; + public InstSuld(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3); + public int TidB => (int)((_opcode >> 36) & 0x1FFF); + public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7); + public int DestPred2 => (int)((_opcode >> 30) & 0x7); + public CacheOpLd CacheOp => (CacheOpLd)((_opcode >> 24) & 0x3); + public SuRgba Rgba => (SuRgba)((_opcode >> 20) & 0xF); + } + + struct InstSuredB + { + private ulong _opcode; + public InstSuredB(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3); + public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7); + public RedOp Op => (RedOp)((_opcode >> 24) & 0x7); + public bool Ba => (_opcode & 0x800000) != 0; + public SuatomSize Size => (SuatomSize)((_opcode >> 20) & 0x7); + } + + struct InstSured + { + private ulong _opcode; + public InstSured(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3); + public int TidB => (int)((_opcode >> 36) & 0x1FFF); + public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7); + public RedOp Op => (RedOp)((_opcode >> 24) & 0x7); + public bool Ba => (_opcode & 0x800000) != 0; + public SuatomSize Size => (SuatomSize)((_opcode >> 20) & 0x7); + } + + struct InstSustDB + { + private ulong _opcode; + public InstSustDB(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3); + public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7); + public CacheOpSt CacheOp => (CacheOpSt)((_opcode >> 24) & 0x3); + public bool Ba => (_opcode & 0x800000) != 0; + public SuSize Size => (SuSize)((_opcode >> 20) & 0x7); + } + + struct InstSustD + { + private ulong _opcode; + public InstSustD(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3); + public int TidB => (int)((_opcode >> 36) & 0x1FFF); + public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7); + public CacheOpSt CacheOp => (CacheOpSt)((_opcode >> 24) & 0x3); + public bool Ba => (_opcode & 0x800000) != 0; + public SuSize Size => (SuSize)((_opcode >> 20) & 0x7); + } + + struct InstSustB + { + private ulong _opcode; + public InstSustB(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3); + public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7); + public CacheOpSt CacheOp => (CacheOpSt)((_opcode >> 24) & 0x3); + public SuRgba Rgba => (SuRgba)((_opcode >> 20) & 0xF); + } + + struct InstSust + { + private ulong _opcode; + public InstSust(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3); + public int TidB => (int)((_opcode >> 36) & 0x1FFF); + public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7); + public CacheOpSt CacheOp => (CacheOpSt)((_opcode >> 24) & 0x3); + public SuRgba Rgba => (SuRgba)((_opcode >> 20) & 0xF); + } + + struct InstSync + { + private ulong _opcode; + public InstSync(ulong opcode) => _opcode = opcode; + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public Ccc Ccc => (Ccc)((_opcode >> 0) & 0x1F); + } + + struct InstTex + { + private ulong _opcode; + public InstTex(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool Lc => (_opcode & 0x400000000000000) != 0; + public int DestPred => (int)((_opcode >> 51) & 0x7); + public int TidB => (int)((_opcode >> 36) & 0x1FFF); + public Lod Lod => (Lod)((_opcode >> 55) & 0x7); + public bool Aoffi => (_opcode & 0x40000000000000) != 0; + public bool Dc => (_opcode & 0x4000000000000) != 0; + public bool Ndv => (_opcode & 0x800000000) != 0; + public TexDim Dim => (TexDim)((_opcode >> 28) & 0x7); + public int WMask => (int)((_opcode >> 31) & 0xF); + public bool Nodep => (_opcode & 0x2000000000000) != 0; + } + + struct InstTexB + { + private ulong _opcode; + public InstTexB(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool Lcb => (_opcode & 0x10000000000) != 0; + public int DestPred => (int)((_opcode >> 51) & 0x7); + public Lod Lodb => (Lod)((_opcode >> 37) & 0x7); + public bool Aoffib => (_opcode & 0x1000000000) != 0; + public bool Dc => (_opcode & 0x4000000000000) != 0; + public bool Ndv => (_opcode & 0x800000000) != 0; + public TexDim Dim => (TexDim)((_opcode >> 28) & 0x7); + public int WMask => (int)((_opcode >> 31) & 0xF); + public bool Nodep => (_opcode & 0x2000000000000) != 0; + } + + struct InstTexs + { + private ulong _opcode; + public InstTexs(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int TidB => (int)((_opcode >> 36) & 0x1FFF); + public TexsTarget Target => (TexsTarget)((_opcode >> 53) & 0xF); + public int WMask => (int)((_opcode >> 50) & 0x7); + public bool Nodep => (_opcode & 0x2000000000000) != 0; + public int Dest2 => (int)((_opcode >> 28) & 0xFF); + } + + struct InstTld + { + private ulong _opcode; + public InstTld(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int TidB => (int)((_opcode >> 36) & 0x1FFF); + public int WMask => (int)((_opcode >> 31) & 0xF); + public bool Lod => (_opcode & 0x80000000000000) != 0; + public bool Toff => (_opcode & 0x800000000) != 0; + public bool Ms => (_opcode & 0x4000000000000) != 0; + public bool Cl => (_opcode & 0x40000000000000) != 0; + public bool Nodep => (_opcode & 0x2000000000000) != 0; + public int DestPred => (int)((_opcode >> 51) & 0x7); + public TexDim Dim => (TexDim)((_opcode >> 28) & 0x7); + } + + struct InstTldB + { + private ulong _opcode; + public InstTldB(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int WMask => (int)((_opcode >> 31) & 0xF); + public bool Lod => (_opcode & 0x80000000000000) != 0; + public bool Toff => (_opcode & 0x800000000) != 0; + public bool Ms => (_opcode & 0x4000000000000) != 0; + public bool Cl => (_opcode & 0x40000000000000) != 0; + public bool Nodep => (_opcode & 0x2000000000000) != 0; + public int DestPred => (int)((_opcode >> 51) & 0x7); + public TexDim Dim => (TexDim)((_opcode >> 28) & 0x7); + } + + struct InstTlds + { + private ulong _opcode; + public InstTlds(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int TidB => (int)((_opcode >> 36) & 0x1FFF); + public TldsTarget Target => (TldsTarget)((_opcode >> 53) & 0xF); + public int WMask => (int)((_opcode >> 50) & 0x7); + public bool Nodep => (_opcode & 0x2000000000000) != 0; + public int Dest2 => (int)((_opcode >> 28) & 0xFF); + } + + struct InstTld4 + { + private ulong _opcode; + public InstTld4(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool Lc => (_opcode & 0x400000000000000) != 0; + public int DestPred => (int)((_opcode >> 51) & 0x7); + public int TidB => (int)((_opcode >> 36) & 0x1FFF); + public TexComp TexComp => (TexComp)((_opcode >> 56) & 0x3); + public TexOffset Toff => (TexOffset)((_opcode >> 54) & 0x3); + public bool Dc => (_opcode & 0x4000000000000) != 0; + public bool Ndv => (_opcode & 0x800000000) != 0; + public TexDim Dim => (TexDim)((_opcode >> 28) & 0x7); + public int WMask => (int)((_opcode >> 31) & 0xF); + public bool Nodep => (_opcode & 0x2000000000000) != 0; + } + + struct InstTld4B + { + private ulong _opcode; + public InstTld4B(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool Lc => (_opcode & 0x10000000000) != 0; + public int DestPred => (int)((_opcode >> 51) & 0x7); + public TexComp TexComp => (TexComp)((_opcode >> 38) & 0x3); + public TexOffset Toff => (TexOffset)((_opcode >> 36) & 0x3); + public bool Dc => (_opcode & 0x4000000000000) != 0; + public bool Ndv => (_opcode & 0x800000000) != 0; + public TexDim Dim => (TexDim)((_opcode >> 28) & 0x7); + public int WMask => (int)((_opcode >> 31) & 0xF); + public bool Nodep => (_opcode & 0x2000000000000) != 0; + } + + struct InstTld4s + { + private ulong _opcode; + public InstTld4s(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int TidB => (int)((_opcode >> 36) & 0x1FFF); + public TexComp TexComp => (TexComp)((_opcode >> 52) & 0x3); + public bool Aoffi => (_opcode & 0x8000000000000) != 0; + public bool Dc => (_opcode & 0x4000000000000) != 0; + public bool Nodep => (_opcode & 0x2000000000000) != 0; + public int Dest2 => (int)((_opcode >> 28) & 0xFF); + } + + struct InstTmml + { + private ulong _opcode; + public InstTmml(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool Nodep => (_opcode & 0x2000000000000) != 0; + public int TidB => (int)((_opcode >> 36) & 0x1FFF); + public bool Ndv => (_opcode & 0x800000000) != 0; + public int WMask => (int)((_opcode >> 31) & 0xF); + public TexDim Dim => (TexDim)((_opcode >> 28) & 0x7); + } + + struct InstTmmlB + { + private ulong _opcode; + public InstTmmlB(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool Nodep => (_opcode & 0x2000000000000) != 0; + public bool Ndv => (_opcode & 0x800000000) != 0; + public int WMask => (int)((_opcode >> 31) & 0xF); + public TexDim Dim => (TexDim)((_opcode >> 28) & 0x7); + } + + struct InstTxa + { + private ulong _opcode; + public InstTxa(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool Nodep => (_opcode & 0x2000000000000) != 0; + public int TidB => (int)((_opcode >> 36) & 0x1FFF); + public bool Ndv => (_opcode & 0x800000000) != 0; + public int WMask => (int)((_opcode >> 31) & 0xF); + } + + struct InstTxd + { + private ulong _opcode; + public InstTxd(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int DestPred => (int)((_opcode >> 51) & 0x7); + public bool Lc => (_opcode & 0x4000000000000) != 0; + public bool Nodep => (_opcode & 0x2000000000000) != 0; + public int TidB => (int)((_opcode >> 36) & 0x1FFF); + public bool Toff => (_opcode & 0x800000000) != 0; + public int WMask => (int)((_opcode >> 31) & 0xF); + public TexDim Dim => (TexDim)((_opcode >> 28) & 0x7); + } + + struct InstTxdB + { + private ulong _opcode; + public InstTxdB(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int DestPred => (int)((_opcode >> 51) & 0x7); + public bool Lc => (_opcode & 0x4000000000000) != 0; + public bool Nodep => (_opcode & 0x2000000000000) != 0; + public bool Toff => (_opcode & 0x800000000) != 0; + public int WMask => (int)((_opcode >> 31) & 0xF); + public TexDim Dim => (TexDim)((_opcode >> 28) & 0x7); + } + + struct InstTxq + { + private ulong _opcode; + public InstTxq(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool Nodep => (_opcode & 0x2000000000000) != 0; + public int TidB => (int)((_opcode >> 36) & 0x1FFF); + public int WMask => (int)((_opcode >> 31) & 0xF); + public TexQuery TexQuery => (TexQuery)((_opcode >> 22) & 0x3F); + } + + struct InstTxqB + { + private ulong _opcode; + public InstTxqB(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool Nodep => (_opcode & 0x2000000000000) != 0; + public int WMask => (int)((_opcode >> 31) & 0xF); + public TexQuery TexQuery => (TexQuery)((_opcode >> 22) & 0x3F); + } + + struct InstVabsdiff + { + private ulong _opcode; + public InstVabsdiff(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool DFormat => (_opcode & 0x40000000000000) != 0; + public VectorSelect ASelect => (VectorSelect)((int)((_opcode >> 45) & 0x8) | (int)((_opcode >> 36) & 0x7)); + public VectorSelect BSelect => (VectorSelect)((int)((_opcode >> 46) & 0x8) | (int)((_opcode >> 28) & 0x7)); + public bool Sat => (_opcode & 0x80000000000000) != 0; + public VideoOp VideoOp => (VideoOp)((_opcode >> 51) & 0x7); + public bool BVideo => (_opcode & 0x4000000000000) != 0; + } + + struct InstVabsdiff4 + { + private ulong _opcode; + public InstVabsdiff4(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public VideoRed VRed => (VideoRed)((_opcode >> 53) & 0x3); + public LaneMask4 LaneMask4 => (LaneMask4)((int)((_opcode >> 49) & 0xC) | (int)((_opcode >> 36) & 0x3)); + public bool Sat => (_opcode & 0x4000000000000) != 0; + public bool SrcBFmt => (_opcode & 0x2000000000000) != 0; + public bool SrcAFmt => (_opcode & 0x1000000000000) != 0; + public bool DFormat => (_opcode & 0x4000000000) != 0; + public ASelect4 Asel4 => (ASelect4)((_opcode >> 32) & 0xF); + public BSelect4 Bsel4 => (BSelect4)((_opcode >> 28) & 0xF); + } + + struct InstVadd + { + private ulong _opcode; + public InstVadd(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int Imm16 => (int)((_opcode >> 20) & 0xFFFF); + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public AvgMode AvgMode => (AvgMode)((_opcode >> 56) & 0x3); + public bool DFormat => (_opcode & 0x40000000000000) != 0; + public VectorSelect ASelect => (VectorSelect)((int)((_opcode >> 45) & 0x8) | (int)((_opcode >> 36) & 0x7)); + public VectorSelect BSelect => (VectorSelect)((int)((_opcode >> 46) & 0x8) | (int)((_opcode >> 28) & 0x7)); + public bool Sat => (_opcode & 0x80000000000000) != 0; + public VideoOp VideoOp => (VideoOp)((_opcode >> 51) & 0x7); + public bool BVideo => (_opcode & 0x4000000000000) != 0; + } + + struct InstVmad + { + private ulong _opcode; + public InstVmad(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int Imm16 => (int)((_opcode >> 20) & 0xFFFF); + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public VectorSelect ASelect => (VectorSelect)((int)((_opcode >> 45) & 0x8) | (int)((_opcode >> 36) & 0x7)); + public VectorSelect BSelect => (VectorSelect)((int)((_opcode >> 46) & 0x8) | (int)((_opcode >> 28) & 0x7)); + public bool Sat => (_opcode & 0x80000000000000) != 0; + public AvgMode AvgMode => (AvgMode)((_opcode >> 53) & 0x3); + public VideoScale VideoScale => (VideoScale)((_opcode >> 51) & 0x3); + public bool BVideo => (_opcode & 0x4000000000000) != 0; + } + + struct InstVmnmx + { + private ulong _opcode; + public InstVmnmx(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int Imm16 => (int)((_opcode >> 20) & 0xFFFF); + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool DFormat => (_opcode & 0x40000000000000) != 0; + public VectorSelect ASelect => (VectorSelect)((int)((_opcode >> 45) & 0x8) | (int)((_opcode >> 36) & 0x7)); + public VectorSelect BSelect => (VectorSelect)((int)((_opcode >> 46) & 0x8) | (int)((_opcode >> 28) & 0x7)); + public bool Sat => (_opcode & 0x80000000000000) != 0; + public VideoOp VideoOp => (VideoOp)((_opcode >> 51) & 0x7); + public bool Mn => (_opcode & 0x100000000000000) != 0; + public bool BVideo => (_opcode & 0x4000000000000) != 0; + } + + struct InstVote + { + private ulong _opcode; + public InstVote(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public VoteMode VoteMode => (VoteMode)((_opcode >> 48) & 0x3); + public int VpDest => (int)((_opcode >> 45) & 0x7); + } + + struct InstVotevtg + { + private ulong _opcode; + public InstVotevtg(ulong opcode) => _opcode = opcode; + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public VoteMode VoteMode => (VoteMode)((_opcode >> 48) & 0x3); + public int Imm28 => (int)((_opcode >> 20) & 0xFFFFFFF); + } + + struct InstVset + { + private ulong _opcode; + public InstVset(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public IComp VComp => (IComp)((_opcode >> 54) & 0x7); + public VectorSelect ASelect => (VectorSelect)((int)((_opcode >> 45) & 0x8) | (int)((_opcode >> 36) & 0x7)); + public VectorSelect BSelect => (VectorSelect)((int)((_opcode >> 46) & 0x8) | (int)((_opcode >> 28) & 0x7)); + public VideoOp VideoOp => (VideoOp)((_opcode >> 51) & 0x7); + public bool BVideo => (_opcode & 0x4000000000000) != 0; + } + + struct InstVsetp + { + private ulong _opcode; + public InstVsetp(ulong opcode) => _opcode = opcode; + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public int Imm16 => (int)((_opcode >> 20) & 0xFFFF); + public VectorSelect ASelect => (VectorSelect)((int)((_opcode >> 45) & 0x8) | (int)((_opcode >> 36) & 0x7)); + public VectorSelect BSelect => (VectorSelect)((int)((_opcode >> 46) & 0x8) | (int)((_opcode >> 28) & 0x7)); + public IComp VComp => (IComp)((int)((_opcode >> 45) & 0x4) | (int)((_opcode >> 43) & 0x3)); + public BoolOp BoolOp => (BoolOp)((_opcode >> 45) & 0x3); + public int SrcPred => (int)((_opcode >> 39) & 0x7); + public bool SrcPredInv => (_opcode & 0x40000000000) != 0; + public int DestPred => (int)((_opcode >> 3) & 0x7); + public int DestPredInv => (int)((_opcode >> 0) & 0x7); + public bool BVideo => (_opcode & 0x4000000000000) != 0; + } + + struct InstVshl + { + private ulong _opcode; + public InstVshl(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Mv => (_opcode & 0x2000000000000) != 0; + public bool DFormat => (_opcode & 0x40000000000000) != 0; + public VectorSelect ASelect => (VectorSelect)((int)((_opcode >> 45) & 0x8) | (int)((_opcode >> 36) & 0x7)); + public VectorSelect BSelect => (VectorSelect)((_opcode >> 28) & 0x7); + public bool Sat => (_opcode & 0x80000000000000) != 0; + public VideoOp VideoOp => (VideoOp)((_opcode >> 51) & 0x7); + public bool BVideo => (_opcode & 0x4000000000000) != 0; + } + + struct InstVshr + { + private ulong _opcode; + public InstVshr(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Mv => (_opcode & 0x2000000000000) != 0; + public bool DFormat => (_opcode & 0x40000000000000) != 0; + public VectorSelect ASelect => (VectorSelect)((int)((_opcode >> 45) & 0x8) | (int)((_opcode >> 36) & 0x7)); + public VectorSelect BSelect => (VectorSelect)((_opcode >> 28) & 0x7); + public bool Sat => (_opcode & 0x80000000000000) != 0; + public VideoOp VideoOp => (VideoOp)((_opcode >> 51) & 0x7); + public bool BVideo => (_opcode & 0x4000000000000) != 0; + } + + struct InstXmadR + { + private ulong _opcode; + public InstXmadR(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcB => (int)((_opcode >> 20) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool HiloA => (_opcode & 0x20000000000000) != 0; + public XmadCop XmadCop => (XmadCop)((_opcode >> 50) & 0x7); + public bool BSigned => (_opcode & 0x2000000000000) != 0; + public bool ASigned => (_opcode & 0x1000000000000) != 0; + public bool X => (_opcode & 0x4000000000) != 0; + public bool Mrg => (_opcode & 0x2000000000) != 0; + public bool Psl => (_opcode & 0x1000000000) != 0; + public bool HiloB => (_opcode & 0x800000000) != 0; + } + + struct InstXmadI + { + private ulong _opcode; + public InstXmadI(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public int Imm16 => (int)((_opcode >> 20) & 0xFFFF); + public bool HiloA => (_opcode & 0x20000000000000) != 0; + public XmadCop XmadCop => (XmadCop)((_opcode >> 50) & 0x7); + public bool BSigned => (_opcode & 0x2000000000000) != 0; + public bool ASigned => (_opcode & 0x1000000000000) != 0; + public bool X => (_opcode & 0x4000000000) != 0; + public bool Mrg => (_opcode & 0x2000000000) != 0; + public bool Psl => (_opcode & 0x1000000000) != 0; + } + + struct InstXmadC + { + private ulong _opcode; + public InstXmadC(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool Mrg => (_opcode & 0x100000000000000) != 0; + public bool Psl => (_opcode & 0x80000000000000) != 0; + public bool X => (_opcode & 0x40000000000000) != 0; + public bool HiloA => (_opcode & 0x20000000000000) != 0; + public bool HiloB => (_opcode & 0x10000000000000) != 0; + public XmadCop2 XmadCop => (XmadCop2)((_opcode >> 50) & 0x3); + public bool BSigned => (_opcode & 0x2000000000000) != 0; + public bool ASigned => (_opcode & 0x1000000000000) != 0; + } + + struct InstXmadRc + { + private ulong _opcode; + public InstXmadRc(ulong opcode) => _opcode = opcode; + public int Dest => (int)((_opcode >> 0) & 0xFF); + public int SrcA => (int)((_opcode >> 8) & 0xFF); + public int SrcC => (int)((_opcode >> 39) & 0xFF); + public int CbufSlot => (int)((_opcode >> 34) & 0x1F); + public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF); + public int Pred => (int)((_opcode >> 16) & 0x7); + public bool PredInv => (_opcode & 0x80000) != 0; + public bool WriteCC => (_opcode & 0x800000000000) != 0; + public bool X => (_opcode & 0x40000000000000) != 0; + public bool HiloA => (_opcode & 0x20000000000000) != 0; + public bool HiloB => (_opcode & 0x10000000000000) != 0; + public XmadCop2 XmadCop => (XmadCop2)((_opcode >> 50) & 0x3); + public bool BSigned => (_opcode & 0x2000000000000) != 0; + public bool ASigned => (_opcode & 0x1000000000000) != 0; + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Decoders/InstName.cs b/src/Ryujinx.Graphics.Shader/Decoders/InstName.cs new file mode 100644 index 00000000..9c79b7a5 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Decoders/InstName.cs @@ -0,0 +1,188 @@ +namespace Ryujinx.Graphics.Shader.Decoders +{ + enum InstName : byte + { + Invalid = 0, + + Al2p, + Ald, + Ast, + Atom, + AtomCas, + Atoms, + AtomsCas, + B2r, + Bar, + Bfe, + Bfi, + Bpt, + Bra, + Brk, + Brx, + Cal, + Cctl, + Cctll, + Cctlt, + Cont, + Cset, + Csetp, + Cs2r, + Dadd, + Depbar, + Dfma, + Dmnmx, + Dmul, + Dset, + Dsetp, + Exit, + F2f, + F2i, + Fadd, + Fadd32i, + Fchk, + Fcmp, + Ffma, + Ffma32i, + Flo, + Fmnmx, + Fmul, + Fmul32i, + Fset, + Fsetp, + Fswzadd, + Getcrsptr, + Getlmembase, + Hadd2, + Hadd232i, + Hfma2, + Hmul2, + Hmul232i, + Hset2, + Hsetp2, + I2f, + I2i, + Iadd, + Iadd32i, + Iadd3, + Icmp, + Ide, + Idp, + Imad, + Imad32i, + Imadsp, + Imnmx, + Imul, + Imul32i, + Ipa, + Isberd, + Iscadd, + Iscadd32i, + Iset, + Isetp, + Jcal, + Jmp, + Jmx, + Kil, + Ld, + Ldc, + Ldg, + Ldl, + Lds, + Lea, + LeaHi, + Lepc, + Longjmp, + Lop, + Lop3, + Lop32i, + Membar, + Mov, + Mov32i, + Mufu, + Nop, + Out, + P2r, + Pbk, + Pcnt, + Pexit, + Pixld, + Plongjmp, + Popc, + Pret, + Prmt, + Pset, + Psetp, + R2b, + R2p, + Ram, + Red, + Ret, + Rro, + Rtt, + S2r, + Sam, + Sel, + Setcrsptr, + Setlmembase, + Shf, + Shf_2, + Shf_3, + Shf_4, + Shfl, + Shl, + Shr, + Ssy, + St, + Stg, + Stl, + Stp, + Sts, + SuatomB, + Suatom, + SuatomB2, + SuatomCasB, + SuatomCas, + SuldDB, + SuldD, + SuldB, + Suld, + SuredB, + Sured, + SustDB, + SustD, + SustB, + Sust, + Sync, + Tex, + TexB, + Texs, + TexsF16, + Tld, + TldB, + Tlds, + TldsF16, + Tld4, + Tld4B, + Tld4s, + Tld4sF16, + Tmml, + TmmlB, + Txa, + Txd, + TxdB, + Txq, + TxqB, + Vabsdiff, + Vabsdiff4, + Vadd, + Vmad, + Vmnmx, + Vote, + Votevtg, + Vset, + Vsetp, + Vshl, + Vshr, + Xmad, + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Decoders/InstOp.cs b/src/Ryujinx.Graphics.Shader/Decoders/InstOp.cs new file mode 100644 index 00000000..39244e64 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Decoders/InstOp.cs @@ -0,0 +1,27 @@ +using Ryujinx.Graphics.Shader.Instructions; + +namespace Ryujinx.Graphics.Shader.Decoders +{ + readonly struct InstOp + { + public readonly ulong Address; + public readonly ulong RawOpCode; + public readonly InstEmitter Emitter; + public readonly InstProps Props; + public readonly InstName Name; + + public InstOp(ulong address, ulong rawOpCode, InstName name, InstEmitter emitter, InstProps props) + { + Address = address; + RawOpCode = rawOpCode; + Name = name; + Emitter = emitter; + Props = props; + } + + public ulong GetAbsoluteAddress() + { + return (ulong)((long)Address + (((int)(RawOpCode >> 20) << 8) >> 8) + 8); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Decoders/InstProps.cs b/src/Ryujinx.Graphics.Shader/Decoders/InstProps.cs new file mode 100644 index 00000000..1af94ab5 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Decoders/InstProps.cs @@ -0,0 +1,28 @@ +namespace Ryujinx.Graphics.Shader.Decoders +{ + enum InstProps : ushort + { + None = 0, + Rd = 1 << 0, + Rd2 = 1 << 1, + Ra = 1 << 2, + Rb = 1 << 3, + Rb2 = 1 << 4, + Ib = 1 << 5, + Rc = 1 << 6, + + Pd = 1 << 7, + LPd = 2 << 7, + SPd = 3 << 7, + TPd = 4 << 7, + VPd = 5 << 7, + PdMask = 7 << 7, + + Pdn = 1 << 10, + Ps = 1 << 11, + Tex = 1 << 12, + TexB = 1 << 13, + Bra = 1 << 14, + NoPred = 1 << 15 + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Decoders/InstTable.cs b/src/Ryujinx.Graphics.Shader/Decoders/InstTable.cs new file mode 100644 index 00000000..eaa77930 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Decoders/InstTable.cs @@ -0,0 +1,390 @@ +using Ryujinx.Graphics.Shader.Instructions; +using System; + +namespace Ryujinx.Graphics.Shader.Decoders +{ + static class InstTable + { + private const int EncodingBits = 14; + + private readonly struct TableEntry + { + public InstName Name { get; } + public InstEmitter Emitter { get; } + public InstProps Props { get; } + + public int XBits { get; } + + public TableEntry(InstName name, InstEmitter emitter, InstProps props, int xBits) + { + Name = name; + Emitter = emitter; + Props = props; + XBits = xBits; + } + } + + private static TableEntry[] _opCodes; + + static InstTable() + { + _opCodes = new TableEntry[1 << EncodingBits]; + + #region Instructions + Add("1110111110100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Al2p, InstEmit.Al2p, InstProps.Rd | InstProps.Ra); + Add("1110111111011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ald, InstEmit.Ald, InstProps.Rd | InstProps.Ra); + Add("1110111111110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ast, InstEmit.Ast, InstProps.Ra | InstProps.Rb2 | InstProps.Rc); + Add("11101101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Atom, InstEmit.Atom, InstProps.Rd | InstProps.Ra | InstProps.Rb); + Add("111011101111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.AtomCas, InstEmit.AtomCas, InstProps.Rd | InstProps.Ra | InstProps.Rb); + Add("11101100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Atoms, InstEmit.Atoms, InstProps.Rd | InstProps.Ra | InstProps.Rb); + Add("111011100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.AtomsCas, InstEmit.AtomsCas, InstProps.Rd | InstProps.Ra | InstProps.Rb); + Add("1111000010111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.B2r, InstEmit.B2r, InstProps.Rd | InstProps.Ra | InstProps.VPd); + Add("1111000010101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bar, InstEmit.Bar, InstProps.Ra | InstProps.Ps); + Add("0101110000000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bfe, InstEmit.BfeR, InstProps.Rd | InstProps.Ra | InstProps.Rb); + Add("0011100x00000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bfe, InstEmit.BfeI, InstProps.Rd | InstProps.Ra | InstProps.Ib); + Add("0100110000000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bfe, InstEmit.BfeC, InstProps.Rd | InstProps.Ra); + Add("0101101111110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bfi, InstEmit.BfiR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("0011011x11110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bfi, InstEmit.BfiI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc); + Add("0100101111110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bfi, InstEmit.BfiC, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("0101001111110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bfi, InstEmit.BfiRc, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("111000111010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bpt, InstEmit.Bpt, InstProps.NoPred); + Add("111000100100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bra, InstEmit.Bra, InstProps.Bra); + Add("111000110100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Brk, InstEmit.Brk, InstProps.Bra); + Add("111000100101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Brx, InstEmit.Brx, InstProps.Ra | InstProps.Bra); + Add("111000100110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cal, InstEmit.Cal, InstProps.Bra | InstProps.NoPred); + Add("11101111011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cctl, InstEmit.Cctl, InstProps.Ra); + Add("1110111110000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cctll, InstEmit.Cctll, InstProps.Ra); + Add("1110101111110xx0000000000000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cctlt, InstEmit.Cctlt); + Add("1110101111101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cctlt, InstEmit.Cctlt, InstProps.Rc); + Add("111000110101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cont, InstEmit.Cont, InstProps.Bra); + Add("0101000010011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cset, InstEmit.Cset, InstProps.Rd | InstProps.Ps); + Add("0101000010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Csetp, InstEmit.Csetp, InstProps.Pd | InstProps.Pdn | InstProps.Ps); + Add("0101000011001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cs2r, InstEmit.Cs2r, InstProps.Rd); + Add("0101110001110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dadd, InstEmit.DaddR, InstProps.Rd | InstProps.Ra | InstProps.Rb); + Add("0011100x01110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dadd, InstEmit.DaddI, InstProps.Rd | InstProps.Ra | InstProps.Ib); + Add("0100110001110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dadd, InstEmit.DaddC, InstProps.Rd | InstProps.Ra); + Add("1111000011110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Depbar, InstEmit.Depbar); + Add("010110110111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dfma, InstEmit.DfmaR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("0011011x0111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dfma, InstEmit.DfmaI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc); + Add("010010110111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dfma, InstEmit.DfmaC, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("010100110111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dfma, InstEmit.DfmaRc, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("0101110001010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dmnmx, InstEmit.DmnmxR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Ps); + Add("0011100x01010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dmnmx, InstEmit.DmnmxI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Ps); + Add("0100110001010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dmnmx, InstEmit.DmnmxC, InstProps.Rd | InstProps.Ra | InstProps.Ps); + Add("0101110010000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dmul, InstEmit.DmulR, InstProps.Rd | InstProps.Ra | InstProps.Rb); + Add("0011100x10000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dmul, InstEmit.DmulI, InstProps.Rd | InstProps.Ra | InstProps.Ib); + Add("0100110010000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dmul, InstEmit.DmulC, InstProps.Rd | InstProps.Ra); + Add("010110010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dset, InstEmit.DsetR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Ps); + Add("0011001x0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dset, InstEmit.DsetI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Ps); + Add("010010010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dset, InstEmit.DsetC, InstProps.Rd | InstProps.Ra | InstProps.Ps); + Add("010110111000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dsetp, InstEmit.DsetpR, InstProps.Ra | InstProps.Rb | InstProps.Pd | InstProps.Pdn | InstProps.Ps); + Add("0011011x1000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dsetp, InstEmit.DsetpI, InstProps.Ra | InstProps.Ib | InstProps.Pd | InstProps.Pdn | InstProps.Ps); + Add("010010111000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dsetp, InstEmit.DsetpC, InstProps.Ra | InstProps.Pd | InstProps.Pdn | InstProps.Ps); + Add("111000110000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Exit, InstEmit.Exit, InstProps.Bra); + Add("0101110010101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.F2f, InstEmit.F2fR, InstProps.Rd | InstProps.Rb); + Add("0011100x10101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.F2f, InstEmit.F2fI, InstProps.Rd | InstProps.Ib); + Add("0100110010101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.F2f, InstEmit.F2fC, InstProps.Rd); + Add("0101110010110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.F2i, InstEmit.F2iR, InstProps.Rd | InstProps.Rb); + Add("0011100x10110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.F2i, InstEmit.F2iI, InstProps.Rd | InstProps.Ib); + Add("0100110010110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.F2i, InstEmit.F2iC, InstProps.Rd); + Add("0101110001011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fadd, InstEmit.FaddR, InstProps.Rd | InstProps.Ra | InstProps.Rb); + Add("0011100x01011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fadd, InstEmit.FaddI, InstProps.Rd | InstProps.Ra | InstProps.Ib); + Add("0100110001011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fadd, InstEmit.FaddC, InstProps.Rd | InstProps.Ra); + Add("000010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fadd32i, InstEmit.Fadd32i, InstProps.Rd | InstProps.Ra); + Add("0101110010001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fchk, InstEmit.FchkR, InstProps.Ra | InstProps.Rb | InstProps.Pd); + Add("0011100x10001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fchk, InstEmit.FchkI, InstProps.Ra | InstProps.Ib | InstProps.Pd); + Add("0100110010001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fchk, InstEmit.FchkC, InstProps.Ra | InstProps.Pd); + Add("010110111010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fcmp, InstEmit.FcmpR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("0011011x1010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fcmp, InstEmit.FcmpI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc); + Add("010010111010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fcmp, InstEmit.FcmpC, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("010100111010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fcmp, InstEmit.FcmpRc, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("010110011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma, InstEmit.FfmaR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("0011001x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma, InstEmit.FfmaI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc); + Add("010010011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma, InstEmit.FfmaC, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("010100011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma, InstEmit.FfmaRc, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("000011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma32i, InstEmit.Ffma32i, InstProps.Rd | InstProps.Ra); + Add("0101110000110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Flo, InstEmit.FloR, InstProps.Rd | InstProps.Rb); + Add("0011100x00110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Flo, InstEmit.FloI, InstProps.Rd | InstProps.Ib); + Add("0100110000110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Flo, InstEmit.FloC, InstProps.Rd); + Add("0101110001100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fmnmx, InstEmit.FmnmxR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Ps); + Add("0011100x01100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fmnmx, InstEmit.FmnmxI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Ps); + Add("0100110001100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fmnmx, InstEmit.FmnmxC, InstProps.Rd | InstProps.Ra | InstProps.Ps); + Add("0101110001101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fmul, InstEmit.FmulR, InstProps.Rd | InstProps.Ra | InstProps.Rb); + Add("0011100x01101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fmul, InstEmit.FmulI, InstProps.Rd | InstProps.Ra | InstProps.Ib); + Add("0100110001101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fmul, InstEmit.FmulC, InstProps.Rd | InstProps.Ra); + Add("00011110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fmul32i, InstEmit.Fmul32i, InstProps.Rd | InstProps.Ra); + Add("01011000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fset, InstEmit.FsetR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Ps); + Add("0011000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fset, InstEmit.FsetI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Ps); + Add("01001000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fset, InstEmit.FsetC, InstProps.Rd | InstProps.Ra | InstProps.Ps); + Add("010110111011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fsetp, InstEmit.FsetpR, InstProps.Ra | InstProps.Rb | InstProps.Pd | InstProps.Pdn | InstProps.Ps); + Add("0011011x1011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fsetp, InstEmit.FsetpI, InstProps.Ra | InstProps.Ib | InstProps.Pd | InstProps.Pdn | InstProps.Ps); + Add("010010111011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fsetp, InstEmit.FsetpC, InstProps.Ra | InstProps.Pd | InstProps.Pdn | InstProps.Ps); + Add("0101000011111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fswzadd, InstEmit.Fswzadd, InstProps.Rd | InstProps.Ra | InstProps.Rb); + Add("111000101100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Getcrsptr, InstEmit.Getcrsptr, InstProps.Rd | InstProps.NoPred); + Add("111000101101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Getlmembase, InstEmit.Getlmembase, InstProps.Rd | InstProps.NoPred); + Add("0101110100010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hadd2, InstEmit.Hadd2R, InstProps.Rd | InstProps.Ra); + Add("0111101x0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hadd2, InstEmit.Hadd2I, InstProps.Rd | InstProps.Ra | InstProps.Ib); + Add("0111101x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hadd2, InstEmit.Hadd2C, InstProps.Rd | InstProps.Ra); + Add("0010110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hadd232i, InstEmit.Hadd232i, InstProps.Rd | InstProps.Ra); + Add("0101110100000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hfma2, InstEmit.Hfma2R, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("01110xxx0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hfma2, InstEmit.Hfma2I, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc); + Add("01110xxx1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hfma2, InstEmit.Hfma2C, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("01100xxx1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hfma2, InstEmit.Hfma2Rc, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("0010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hfma2, InstEmit.Hfma232i, InstProps.Rd | InstProps.Ra); + Add("0101110100001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hmul2, InstEmit.Hmul2R, InstProps.Rd | InstProps.Ra | InstProps.Rb); + Add("0111100x0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hmul2, InstEmit.Hmul2I, InstProps.Rd | InstProps.Ra | InstProps.Ib); + Add("0111100x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hmul2, InstEmit.Hmul2C, InstProps.Rd | InstProps.Ra); + Add("0010101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hmul232i, InstEmit.Hmul232i, InstProps.Rd | InstProps.Ra); + Add("0101110100011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hset2, InstEmit.Hset2R, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Ps); + Add("0111110x0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hset2, InstEmit.Hset2I, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Ps); + Add("0111110x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hset2, InstEmit.Hset2C, InstProps.Rd | InstProps.Ra | InstProps.Ps); + Add("0101110100100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hsetp2, InstEmit.Hsetp2R, InstProps.Ra | InstProps.Rb | InstProps.Pd | InstProps.Pdn | InstProps.Ps); + Add("0111111x0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hsetp2, InstEmit.Hsetp2I, InstProps.Ra | InstProps.Ib | InstProps.Pd | InstProps.Pdn | InstProps.Ps); + Add("0111111x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hsetp2, InstEmit.Hsetp2C, InstProps.Ra | InstProps.Pd | InstProps.Pdn | InstProps.Ps); + Add("0101110010111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.I2f, InstEmit.I2fR, InstProps.Rd | InstProps.Rb); + Add("0011100x10111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.I2f, InstEmit.I2fI, InstProps.Rd | InstProps.Ib); + Add("0100110010111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.I2f, InstEmit.I2fC, InstProps.Rd); + Add("0101110011100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.I2i, InstEmit.I2iR, InstProps.Rd | InstProps.Rb); + Add("0011100x11100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.I2i, InstEmit.I2iI, InstProps.Rd | InstProps.Ib); + Add("0100110011100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.I2i, InstEmit.I2iC, InstProps.Rd); + Add("0101110000010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iadd, InstEmit.IaddR, InstProps.Rd | InstProps.Ra | InstProps.Rb); + Add("0011100x00010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iadd, InstEmit.IaddI, InstProps.Rd | InstProps.Ra | InstProps.Ib); + Add("0100110000010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iadd, InstEmit.IaddC, InstProps.Rd | InstProps.Ra); + Add("0001110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iadd32i, InstEmit.Iadd32i, InstProps.Rd | InstProps.Ra); + Add("010111001100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iadd3, InstEmit.Iadd3R, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("0011100x1100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iadd3, InstEmit.Iadd3I, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc); + Add("010011001100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iadd3, InstEmit.Iadd3C, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("010110110100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Icmp, InstEmit.IcmpR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("0011011x0100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Icmp, InstEmit.IcmpI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc); + Add("010010110100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Icmp, InstEmit.IcmpC, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("010100110100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Icmp, InstEmit.IcmpRc, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("111000111001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ide, InstEmit.Ide, InstProps.NoPred); + Add("0101001111111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Idp, InstEmit.IdpR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("0101001111011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Idp, InstEmit.IdpC, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("010110100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imad, InstEmit.ImadR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("0011010x0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imad, InstEmit.ImadI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc); + Add("010010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imad, InstEmit.ImadC, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("010100100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imad, InstEmit.ImadRc, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("000100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imad32i, InstEmit.Imad32i, InstProps.Rd | InstProps.Ra); + Add("010110101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imadsp, InstEmit.ImadspR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("0011010x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imadsp, InstEmit.ImadspI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc); + Add("010010101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imadsp, InstEmit.ImadspC, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("010100101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imadsp, InstEmit.ImadspRc, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("0101110000100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imnmx, InstEmit.ImnmxR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Ps); + Add("0011100x00100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imnmx, InstEmit.ImnmxI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Ps); + Add("0100110000100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imnmx, InstEmit.ImnmxC, InstProps.Rd | InstProps.Ra | InstProps.Ps); + Add("0101110000111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imul, InstEmit.ImulR, InstProps.Rd | InstProps.Ra | InstProps.Rb); + Add("0011100x00111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imul, InstEmit.ImulI, InstProps.Rd | InstProps.Ra | InstProps.Ib); + Add("0100110000111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imul, InstEmit.ImulC, InstProps.Rd | InstProps.Ra); + Add("00011111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imul32i, InstEmit.Imul32i, InstProps.Rd | InstProps.Ra); + Add("11100000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ipa, InstEmit.Ipa, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("1110111111010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Isberd, InstEmit.Isberd, InstProps.Rd | InstProps.Ra); + Add("0101110000011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iscadd, InstEmit.IscaddR, InstProps.Rd | InstProps.Ra | InstProps.Rb); + Add("0011100x00011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iscadd, InstEmit.IscaddI, InstProps.Rd | InstProps.Ra | InstProps.Ib); + Add("0100110000011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iscadd, InstEmit.IscaddC, InstProps.Rd | InstProps.Ra); + Add("000101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iscadd32i, InstEmit.Iscadd32i, InstProps.Rd | InstProps.Ra); + Add("010110110101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iset, InstEmit.IsetR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Ps); + Add("0011011x0101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iset, InstEmit.IsetI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Ps); + Add("010010110101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iset, InstEmit.IsetC, InstProps.Rd | InstProps.Ra | InstProps.Ps); + Add("010110110110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Isetp, InstEmit.IsetpR, InstProps.Ra | InstProps.Rb | InstProps.Pd | InstProps.Pdn | InstProps.Ps); + Add("0011011x0110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Isetp, InstEmit.IsetpI, InstProps.Ra | InstProps.Ib | InstProps.Pd | InstProps.Pdn | InstProps.Ps); + Add("010010110110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Isetp, InstEmit.IsetpC, InstProps.Ra | InstProps.Pd | InstProps.Pdn | InstProps.Ps); + Add("111000100010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Jcal, InstEmit.Jcal, InstProps.Bra); + Add("111000100001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Jmp, InstEmit.Jmp, InstProps.Ra | InstProps.Bra); + Add("111000100000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Jmx, InstEmit.Jmx, InstProps.Ra | InstProps.Bra); + Add("111000110011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Kil, InstEmit.Kil, InstProps.Bra); + Add("100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ld, InstEmit.Ld, InstProps.Rd | InstProps.Ra); + Add("1110111110010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldc, InstEmit.Ldc, InstProps.Rd | InstProps.Ra); + Add("1110111011010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldg, InstEmit.Ldg, InstProps.Rd | InstProps.Ra); + Add("1110111101000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldl, InstEmit.Ldl, InstProps.Rd | InstProps.Ra); + Add("1110111101001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lds, InstEmit.Lds, InstProps.Rd | InstProps.Ra); + Add("0101101111010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lea, InstEmit.LeaR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.LPd); + Add("0011011x11010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lea, InstEmit.LeaI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.LPd); + Add("0100101111010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lea, InstEmit.LeaC, InstProps.Rd | InstProps.Ra | InstProps.LPd); + Add("0101101111011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.LeaHi, InstEmit.LeaHiR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc | InstProps.LPd); + Add("000110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.LeaHi, InstEmit.LeaHiC, InstProps.Rd | InstProps.Ra | InstProps.Rc | InstProps.LPd); + Add("0101000011010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lepc, InstEmit.Lepc); + Add("111000110001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Longjmp, InstEmit.Longjmp, InstProps.Bra); + Add("0101110001000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lop, InstEmit.LopR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.LPd); + Add("0011100x01000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lop, InstEmit.LopI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.LPd); + Add("0100110001000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lop, InstEmit.LopC, InstProps.Rd | InstProps.Ra | InstProps.LPd); + Add("0101101111100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lop3, InstEmit.Lop3R, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc | InstProps.LPd); + Add("001111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lop3, InstEmit.Lop3I, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc); + Add("0000001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lop3, InstEmit.Lop3C, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("000001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lop32i, InstEmit.Lop32i, InstProps.Rd | InstProps.Ra); + Add("1110111110011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Membar, InstEmit.Membar); + Add("0101110010011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Mov, InstEmit.MovR, InstProps.Rd | InstProps.Ra); + Add("0011100x10011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Mov, InstEmit.MovI, InstProps.Rd | InstProps.Ib); + Add("0100110010011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Mov, InstEmit.MovC, InstProps.Rd); + Add("000000010000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Mov32i, InstEmit.Mov32i, InstProps.Rd); + Add("0101000010000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Mufu, InstEmit.Mufu, InstProps.Rd | InstProps.Ra); + Add("0101000010110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Nop, InstEmit.Nop); + Add("1111101111100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Out, InstEmit.OutR, InstProps.Rd | InstProps.Ra | InstProps.Rb); + Add("1111011x11100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Out, InstEmit.OutI, InstProps.Rd | InstProps.Ra | InstProps.Ib); + Add("1110101111100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Out, InstEmit.OutC, InstProps.Rd | InstProps.Ra); + Add("0101110011101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.P2r, InstEmit.P2rR, InstProps.Rd | InstProps.Ra | InstProps.Rb); + Add("0011100x11101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.P2r, InstEmit.P2rI, InstProps.Rd | InstProps.Ra | InstProps.Ib); + Add("0100110011101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.P2r, InstEmit.P2rC, InstProps.Rd | InstProps.Ra); + Add("111000101010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Pbk, InstEmit.Pbk, InstProps.NoPred); + Add("111000101011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Pcnt, InstEmit.Pcnt, InstProps.NoPred); + Add("111000100011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Pexit, InstEmit.Pexit); + Add("1110111111101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Pixld, InstEmit.Pixld, InstProps.Rd | InstProps.Ra | InstProps.VPd); + Add("111000101000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Plongjmp, InstEmit.Plongjmp, InstProps.Bra | InstProps.NoPred); + Add("0101110000001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Popc, InstEmit.PopcR, InstProps.Rd | InstProps.Rb); + Add("0011100x00001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Popc, InstEmit.PopcI, InstProps.Rd | InstProps.Ib); + Add("0100110000001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Popc, InstEmit.PopcC, InstProps.Rd); + Add("111000100111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Pret, InstEmit.Pret, InstProps.NoPred); + Add("010110111100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Prmt, InstEmit.PrmtR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("0011011x1100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Prmt, InstEmit.PrmtI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc); + Add("010010111100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Prmt, InstEmit.PrmtC, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("010100111100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Prmt, InstEmit.PrmtRc, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("0101000010001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Pset, InstEmit.Pset, InstProps.Rd | InstProps.Ps); + Add("0101000010010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Psetp, InstEmit.Psetp, InstProps.Pd | InstProps.Pdn | InstProps.Ps); + Add("1111000011000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.R2b, InstEmit.R2b, InstProps.Rb); + Add("0101110011110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.R2p, InstEmit.R2pR, InstProps.Ra | InstProps.Rb); + Add("0011100x11110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.R2p, InstEmit.R2pI, InstProps.Ra | InstProps.Ib); + Add("0100110011110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.R2p, InstEmit.R2pC, InstProps.Ra); + Add("111000111000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ram, InstEmit.Ram, InstProps.NoPred); + Add("1110101111111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Red, InstEmit.Red, InstProps.Ra | InstProps.Rb2); + Add("111000110010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ret, InstEmit.Ret, InstProps.Bra); + Add("0101110010010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Rro, InstEmit.RroR, InstProps.Rd | InstProps.Rb); + Add("0011100x10010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Rro, InstEmit.RroI, InstProps.Rd | InstProps.Ib); + Add("0100110010010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Rro, InstEmit.RroC, InstProps.Rd); + Add("111000110110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Rtt, InstEmit.Rtt, InstProps.NoPred); + Add("1111000011001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.S2r, InstEmit.S2r, InstProps.Rd); + Add("111000110111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Sam, InstEmit.Sam, InstProps.NoPred); + Add("0101110010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Sel, InstEmit.SelR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Ps); + Add("0011100x10100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Sel, InstEmit.SelI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Ps); + Add("0100110010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Sel, InstEmit.SelC, InstProps.Rd | InstProps.Ra | InstProps.Ps); + Add("111000101110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Setcrsptr, InstEmit.Setcrsptr, InstProps.Ra | InstProps.NoPred); + Add("111000101111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Setlmembase, InstEmit.Setlmembase, InstProps.Ra | InstProps.NoPred); + Add("0101101111111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shf, InstEmit.ShfLR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("0101110011111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shf, InstEmit.ShfRR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("0011011x11111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shf, InstEmit.ShfLI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc); + Add("0011100x11111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shf, InstEmit.ShfRI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc); + Add("1110111100010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shfl, InstEmit.Shfl, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc | InstProps.LPd); + Add("0101110001001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shl, InstEmit.ShlR, InstProps.Rd | InstProps.Ra | InstProps.Rb); + Add("0011100x01001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shl, InstEmit.ShlI, InstProps.Rd | InstProps.Ra | InstProps.Ib); + Add("0100110001001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shl, InstEmit.ShlC, InstProps.Rd | InstProps.Ra); + Add("0101110000101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shr, InstEmit.ShrR, InstProps.Rd | InstProps.Ra | InstProps.Rb); + Add("0011100x00101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shr, InstEmit.ShrI, InstProps.Rd | InstProps.Ra | InstProps.Ib); + Add("0100110000101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shr, InstEmit.ShrC, InstProps.Rd | InstProps.Ra); + Add("111000101001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ssy, InstEmit.Ssy, InstProps.NoPred); + Add("101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.St, InstEmit.St, InstProps.Rd | InstProps.Ra); + Add("1110111011011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Stg, InstEmit.Stg, InstProps.Rd | InstProps.Ra); + Add("1110111101010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Stl, InstEmit.Stl, InstProps.Rd | InstProps.Ra); + Add("1110111010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Stp, InstEmit.Stp, InstProps.NoPred); + Add("1110111101011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Sts, InstEmit.Sts, InstProps.Rd | InstProps.Ra); + Add("1110101001110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SuatomB, InstEmit.SuatomB, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("11101010x0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Suatom, InstEmit.Suatom, InstProps.Rd | InstProps.Ra | InstProps.Rb); + Add("1110101110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SuatomB2, InstEmit.SuatomB2, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("1110101011010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SuatomCasB, InstEmit.SuatomCasB, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc | InstProps.SPd); + Add("1110101x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SuatomCas, InstEmit.SuatomCas, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.SPd); + Add("1110101100010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SuldDB, InstEmit.SuldDB, InstProps.Rd | InstProps.Ra | InstProps.Rc | InstProps.SPd | InstProps.TexB); + Add("1110101100011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SuldD, InstEmit.SuldD, InstProps.Rd | InstProps.Ra | InstProps.SPd | InstProps.Tex); + Add("1110101100000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SuldB, InstEmit.SuldB, InstProps.Rd | InstProps.Ra | InstProps.Rc | InstProps.SPd | InstProps.TexB); + Add("1110101100001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Suld, InstEmit.Suld, InstProps.Rd | InstProps.Ra | InstProps.SPd | InstProps.Tex); + Add("1110101101010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SuredB, InstEmit.SuredB, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("1110101101011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Sured, InstEmit.Sured, InstProps.Rd | InstProps.Ra); + Add("1110101100110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SustDB, InstEmit.SustDB, InstProps.Rd | InstProps.Ra | InstProps.Rc | InstProps.TexB); + Add("1110101100111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SustD, InstEmit.SustD, InstProps.Rd | InstProps.Ra | InstProps.Tex); + Add("1110101100100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SustB, InstEmit.SustB, InstProps.Rd | InstProps.Ra | InstProps.Rc | InstProps.TexB); + Add("1110101100101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Sust, InstEmit.Sust, InstProps.Rd | InstProps.Ra | InstProps.Tex); + Add("1111000011111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Sync, InstEmit.Sync, InstProps.Bra); + Add("11000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tex, InstEmit.Tex, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TPd | InstProps.Tex); + Add("1101111010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.TexB, InstEmit.TexB, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TPd | InstProps.TexB); + Add("1101100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Texs, InstEmit.Texs, InstProps.Rd | InstProps.Rd2 | InstProps.Ra | InstProps.Rb | InstProps.Tex); + Add("1101000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.TexsF16, InstEmit.TexsF16, InstProps.Rd | InstProps.Rd2 | InstProps.Ra | InstProps.Rb | InstProps.Tex); + Add("11011100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tld, InstEmit.Tld, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TPd | InstProps.Tex); + Add("11011101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.TldB, InstEmit.TldB, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TPd | InstProps.TexB); + Add("1101101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tlds, InstEmit.Tlds, InstProps.Rd | InstProps.Rd2 | InstProps.Ra | InstProps.Rb | InstProps.Tex); + Add("1101001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.TldsF16, InstEmit.TldsF16, InstProps.Rd | InstProps.Rd2 | InstProps.Ra | InstProps.Rb | InstProps.Tex); + Add("110010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tld4, InstEmit.Tld4, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TPd | InstProps.Tex); + Add("1101111011xxxxxxxxxxxxx0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tld4B, InstEmit.Tld4B, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TPd | InstProps.TexB); + Add("1101111100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tld4s, InstEmit.Tld4s, InstProps.Rd | InstProps.Rd2 | InstProps.Ra | InstProps.Rb | InstProps.Tex); + Add("1101111110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tld4sF16, InstEmit.Tld4sF16, InstProps.Rd | InstProps.Rd2 | InstProps.Ra | InstProps.Rb | InstProps.Tex); + Add("1101111101011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tmml, InstEmit.Tmml, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Tex); + Add("1101111101100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.TmmlB, InstEmit.TmmlB, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TexB); + Add("1101111101000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Txa, InstEmit.Txa, InstProps.Rd | InstProps.Ra | InstProps.Tex); + Add("110111100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Txd, InstEmit.Txd, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TPd | InstProps.Tex); + Add("1101111001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.TxdB, InstEmit.TxdB, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TPd | InstProps.TexB); + Add("1101111101001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Txq, InstEmit.Txq, InstProps.Rd | InstProps.Ra | InstProps.Tex); + Add("1101111101010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.TxqB, InstEmit.TxqB, InstProps.Rd | InstProps.Ra | InstProps.TexB); + Add("01010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vabsdiff, InstEmit.Vabsdiff, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("010100000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vabsdiff4, InstEmit.Vabsdiff4, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("001000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vadd, InstEmit.Vadd, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("01011111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vmad, InstEmit.Vmad, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("0011101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vmnmx, InstEmit.Vmnmx, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("0101000011011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vote, InstEmit.Vote, InstProps.Rd | InstProps.VPd | InstProps.Ps); + Add("0101000011100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Votevtg, InstEmit.Votevtg); + Add("0100000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vset, InstEmit.Vset, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("0101000011110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vsetp, InstEmit.Vsetp, InstProps.Ra | InstProps.Rb | InstProps.Pd | InstProps.Pdn | InstProps.Ps); + Add("01010111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vshl, InstEmit.Vshl, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("01010110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vshr, InstEmit.Vshr, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("0101101100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Xmad, InstEmit.XmadR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc); + Add("0011011x00xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Xmad, InstEmit.XmadI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc); + Add("0100111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Xmad, InstEmit.XmadC, InstProps.Rd | InstProps.Ra | InstProps.Rc); + Add("010100010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Xmad, InstEmit.XmadRc, InstProps.Rd | InstProps.Ra | InstProps.Rc); + #endregion + } + + private static void Add(string encoding, InstName name, InstEmitter emitter, InstProps props = InstProps.None) + { + ReadOnlySpan<char> encodingPart = encoding.AsSpan(0, EncodingBits); + + int bit = encodingPart.Length - 1; + int value = 0; + int xMask = 0; + int xBits = 0; + + int[] xPos = new int[encodingPart.Length]; + + for (int index = 0; index < encodingPart.Length; index++, bit--) + { + char chr = encodingPart[index]; + + if (chr == '1') + { + value |= 1 << bit; + } + else if (chr == 'x') + { + xMask |= 1 << bit; + + xPos[xBits++] = bit; + } + } + + xMask = ~xMask; + + TableEntry entry = new TableEntry(name, emitter, props, xBits); + + for (int index = 0; index < (1 << xBits); index++) + { + value &= xMask; + + for (int x = 0; x < xBits; x++) + { + value |= ((index >> x) & 1) << xPos[x]; + } + + if (_opCodes[value].Emitter == null || _opCodes[value].XBits > xBits) + { + _opCodes[value] = entry; + } + } + } + + public static InstOp GetOp(ulong address, ulong opCode) + { + ref TableEntry entry = ref _opCodes[opCode >> (64 - EncodingBits)]; + + if (entry.Emitter != null) + { + return new InstOp(address, opCode, entry.Name, entry.Emitter, entry.Props); + } + + return new InstOp(address, opCode, InstName.Invalid, null, InstProps.None); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Decoders/Register.cs b/src/Ryujinx.Graphics.Shader/Decoders/Register.cs new file mode 100644 index 00000000..e375096d --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Decoders/Register.cs @@ -0,0 +1,36 @@ +using System; + +namespace Ryujinx.Graphics.Shader.Decoders +{ + readonly struct Register : IEquatable<Register> + { + public int Index { get; } + + public RegisterType Type { get; } + + public bool IsRZ => Type == RegisterType.Gpr && Index == RegisterConsts.RegisterZeroIndex; + public bool IsPT => Type == RegisterType.Predicate && Index == RegisterConsts.PredicateTrueIndex; + + public Register(int index, RegisterType type) + { + Index = index; + Type = type; + } + + public override int GetHashCode() + { + return (ushort)Index | ((ushort)Type << 16); + } + + public override bool Equals(object obj) + { + return obj is Register reg && Equals(reg); + } + + public bool Equals(Register other) + { + return other.Index == Index && + other.Type == Type; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Decoders/RegisterConsts.cs b/src/Ryujinx.Graphics.Shader/Decoders/RegisterConsts.cs new file mode 100644 index 00000000..d381f954 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Decoders/RegisterConsts.cs @@ -0,0 +1,13 @@ +namespace Ryujinx.Graphics.Shader.Decoders +{ + static class RegisterConsts + { + public const int GprsCount = 255; + public const int PredsCount = 7; + public const int FlagsCount = 4; + public const int TotalCount = GprsCount + PredsCount + FlagsCount; + + public const int RegisterZeroIndex = GprsCount; + public const int PredicateTrueIndex = PredsCount; + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Decoders/RegisterType.cs b/src/Ryujinx.Graphics.Shader/Decoders/RegisterType.cs new file mode 100644 index 00000000..648f816a --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Decoders/RegisterType.cs @@ -0,0 +1,9 @@ +namespace Ryujinx.Graphics.Shader.Decoders +{ + enum RegisterType + { + Flag, + Gpr, + Predicate, + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs new file mode 100644 index 00000000..2207156c --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs @@ -0,0 +1,528 @@ +using System; + +namespace Ryujinx.Graphics.Shader +{ + /// <summary> + /// GPU state access interface. + /// </summary> + public interface IGpuAccessor + { + /// <summary> + /// Prints a log message. + /// </summary> + /// <param name="message">Message to print</param> + void Log(string message) + { + // No default log output. + } + + /// <summary> + /// Reads data from the constant buffer 1. + /// </summary> + /// <param name="offset">Offset in bytes to read from</param> + /// <returns>Value at the given offset</returns> + uint ConstantBuffer1Read(int offset) + { + return 0; + } + + /// <summary> + /// Gets a span of the specified memory location, containing shader code. + /// </summary> + /// <param name="address">GPU virtual address of the data</param> + /// <param name="minimumSize">Minimum size that the returned span may have</param> + /// <returns>Span of the memory location</returns> + ReadOnlySpan<ulong> GetCode(ulong address, int minimumSize); + + /// <summary> + /// Queries the alpha test comparison operator that is being used currently. + /// If alpha test is disabled, it should be set to <see cref="AlphaTestOp.Always"/>. + /// </summary> + /// <returns>Current alpha test comparison</returns> + AlphaTestOp QueryAlphaTestCompare() + { + return AlphaTestOp.Always; + } + + /// <summary> + /// Queries the current alpha test reference value used by the comparison. + /// </summary> + /// <returns>Current alpha test reference value</returns> + float QueryAlphaTestReference() + { + return 0f; + } + + /// <summary> + /// Queries the type of the vertex shader input attribute at the specified <paramref name="location"/>. + /// </summary> + /// <param name="location">Location of the input attribute</param> + /// <returns>Input type</returns> + AttributeType QueryAttributeType(int location) + { + return AttributeType.Float; + } + + /// <summary> + /// Queries whenever the alpha-to-coverage dithering feature is enabled. + /// </summary> + /// <returns>True if the feature is enabled, false otherwise</returns> + bool QueryAlphaToCoverageDitherEnable() + { + return false; + } + + /// <summary> + /// Queries the binding number of a constant buffer. + /// </summary> + /// <param name="index">Constant buffer index</param> + /// <returns>Binding number</returns> + int QueryBindingConstantBuffer(int index) + { + return index; + } + + /// <summary> + /// Queries the binding number of a storage buffer. + /// </summary> + /// <param name="index">Storage buffer index</param> + /// <returns>Binding number</returns> + int QueryBindingStorageBuffer(int index) + { + return index; + } + + /// <summary> + /// Queries the binding number of a texture. + /// </summary> + /// <param name="index">Texture index</param> + /// <param name="isBuffer">Indicates if the texture is a buffer texture</param> + /// <returns>Binding number</returns> + int QueryBindingTexture(int index, bool isBuffer) + { + return index; + } + + /// <summary> + /// Queries the binding number of an image. + /// </summary> + /// <param name="index">Image index</param> + /// <param name="isBuffer">Indicates if the image is a buffer image</param> + /// <returns>Binding number</returns> + int QueryBindingImage(int index, bool isBuffer) + { + return index; + } + + /// <summary> + /// Queries output type for fragment shaders. + /// </summary> + /// <param name="location">Location of the framgent output</param> + /// <returns>Output location</returns> + AttributeType QueryFragmentOutputType(int location) + { + return AttributeType.Float; + } + + /// <summary> + /// Queries Local Size X for compute shaders. + /// </summary> + /// <returns>Local Size X</returns> + int QueryComputeLocalSizeX() + { + return 1; + } + + /// <summary> + /// Queries Local Size Y for compute shaders. + /// </summary> + /// <returns>Local Size Y</returns> + int QueryComputeLocalSizeY() + { + return 1; + } + + /// <summary> + /// Queries Local Size Z for compute shaders. + /// </summary> + /// <returns>Local Size Z</returns> + int QueryComputeLocalSizeZ() + { + return 1; + } + + /// <summary> + /// Queries Local Memory size in bytes for compute shaders. + /// </summary> + /// <returns>Local Memory size in bytes</returns> + int QueryComputeLocalMemorySize() + { + return 0x1000; + } + + /// <summary> + /// Queries Shared Memory size in bytes for compute shaders. + /// </summary> + /// <returns>Shared Memory size in bytes</returns> + int QueryComputeSharedMemorySize() + { + return 0xc000; + } + + /// <summary> + /// Queries Constant Buffer usage information. + /// </summary> + /// <returns>A mask where each bit set indicates a bound constant buffer</returns> + uint QueryConstantBufferUse() + { + return 0; + } + + /// <summary> + /// Queries whenever the current draw has written the base vertex and base instance into Constant Buffer 0. + /// </summary> + /// <returns>True if the shader translator can assume that the constant buffer contains the base IDs, false otherwise</returns> + bool QueryHasConstantBufferDrawParameters() + { + return false; + } + + /// <summary> + /// Queries whenever the current draw uses unaligned storage buffer addresses. + /// </summary> + /// <returns>True if any storage buffer address is not aligned to 16 bytes, false otherwise</returns> + bool QueryHasUnalignedStorageBuffer() + { + return false; + } + + /// <summary> + /// Queries host's gather operation precision bits for biasing their coordinates. Zero means no bias. + /// </summary> + /// <returns>Bits of gather operation precision to use for coordinate bias</returns> + int QueryHostGatherBiasPrecision() + { + return 0; + } + + /// <summary> + /// Queries host about whether to reduce precision to improve performance. + /// </summary> + /// <returns>True if precision is limited to vertex position, false otherwise</returns> + bool QueryHostReducedPrecision() + { + return false; + } + + /// <summary> + /// Queries dual source blend state. + /// </summary> + /// <returns>True if blending is enabled with a dual source blend equation, false otherwise</returns> + bool QueryDualSourceBlendEnable() + { + return false; + } + + /// <summary> + /// Queries host about the presence of the FrontFacing built-in variable bug. + /// </summary> + /// <returns>True if the bug is present on the host device used, false otherwise</returns> + bool QueryHostHasFrontFacingBug() + { + return false; + } + + /// <summary> + /// Queries host about the presence of the vector indexing bug. + /// </summary> + /// <returns>True if the bug is present on the host device used, false otherwise</returns> + bool QueryHostHasVectorIndexingBug() + { + return false; + } + + /// <summary> + /// Queries host storage buffer alignment required. + /// </summary> + /// <returns>Host storage buffer alignment in bytes</returns> + int QueryHostStorageBufferOffsetAlignment() + { + return 16; + } + + /// <summary> + /// Queries host support for texture formats with BGRA component order (such as BGRA8). + /// </summary> + /// <returns>True if BGRA formats are supported, false otherwise</returns> + bool QueryHostSupportsBgraFormat() + { + return true; + } + + /// <summary> + /// Queries host support for fragment shader ordering critical sections on the shader code. + /// </summary> + /// <returns>True if fragment shader interlock is supported, false otherwise</returns> + bool QueryHostSupportsFragmentShaderInterlock() + { + return true; + } + + /// <summary> + /// Queries host support for fragment shader ordering scoped critical sections on the shader code. + /// </summary> + /// <returns>True if fragment shader ordering is supported, false otherwise</returns> + bool QueryHostSupportsFragmentShaderOrderingIntel() + { + return false; + } + + /// <summary> + /// Queries host GPU geometry shader support. + /// </summary> + /// <returns>True if the GPU and driver supports geometry shaders, false otherwise</returns> + bool QueryHostSupportsGeometryShader() + { + return true; + } + + /// <summary> + /// Queries host GPU geometry shader passthrough support. + /// </summary> + /// <returns>True if the GPU and driver supports geometry shader passthrough, false otherwise</returns> + bool QueryHostSupportsGeometryShaderPassthrough() + { + return true; + } + + /// <summary> + /// Queries host support for readable images without a explicit format declaration on the shader. + /// </summary> + /// <returns>True if formatted image load is supported, false otherwise</returns> + bool QueryHostSupportsImageLoadFormatted() + { + return true; + } + + /// <summary> + /// Queries host support for writes to the layer from vertex or tessellation shader stages. + /// </summary> + /// <returns>True if writes to the layer from vertex or tessellation are supported, false otherwise</returns> + bool QueryHostSupportsLayerVertexTessellation() + { + return true; + } + + /// <summary> + /// Queries host GPU non-constant texture offset support. + /// </summary> + /// <returns>True if the GPU and driver supports non-constant texture offsets, false otherwise</returns> + bool QueryHostSupportsNonConstantTextureOffset() + { + return true; + } + + /// <summary> + /// Queries host GPU shader ballot support. + /// </summary> + /// <returns>True if the GPU and driver supports shader ballot, false otherwise</returns> + bool QueryHostSupportsShaderBallot() + { + return true; + } + + /// <summary> + /// Queries host GPU support for signed normalized buffer texture formats. + /// </summary> + /// <returns>True if the GPU and driver supports the formats, false otherwise</returns> + bool QueryHostSupportsSnormBufferTextureFormat() + { + return true; + } + + /// <summary> + /// Queries host GPU texture shadow LOD support. + /// </summary> + /// <returns>True if the GPU and driver supports texture shadow LOD, false otherwise</returns> + bool QueryHostSupportsTextureShadowLod() + { + return true; + } + + /// <summary> + /// Queries host support for writes to the viewport index from vertex or tessellation shader stages. + /// </summary> + /// <returns>True if writes to the viewport index from vertex or tessellation are supported, false otherwise</returns> + bool QueryHostSupportsViewportIndexVertexTessellation() + { + return true; + } + + /// <summary> + /// Queries host GPU shader viewport mask output support. + /// </summary> + /// <returns>True if the GPU and driver supports shader viewport mask output, false otherwise</returns> + bool QueryHostSupportsViewportMask() + { + return true; + } + + /// <summary> + /// Queries the point size from the GPU state, used when it is not explicitly set on the shader. + /// </summary> + /// <returns>Current point size</returns> + float QueryPointSize() + { + return 1f; + } + + /// <summary> + /// Queries the state that indicates if the program point size should be explicitly set on the shader + /// or read from the GPU state. + /// </summary> + /// <returns>True if the shader is expected to set the point size explicitly, false otherwise</returns> + bool QueryProgramPointSize() + { + return true; + } + + /// <summary> + /// Queries sampler type information. + /// </summary> + /// <param name="handle">Texture handle</param> + /// <param name="cbufSlot">Constant buffer slot for the texture handle</param> + /// <returns>The sampler type value for the given handle</returns> + SamplerType QuerySamplerType(int handle, int cbufSlot = -1) + { + return SamplerType.Texture2D; + } + + /// <summary> + /// Queries texture coordinate normalization information. + /// </summary> + /// <param name="handle">Texture handle</param> + /// <param name="cbufSlot">Constant buffer slot for the texture handle</param> + /// <returns>True if the coordinates are normalized, false otherwise</returns> + bool QueryTextureCoordNormalized(int handle, int cbufSlot = -1) + { + return true; + } + + /// <summary> + /// Queries current primitive topology for geometry shaders. + /// </summary> + /// <returns>Current primitive topology</returns> + InputTopology QueryPrimitiveTopology() + { + return InputTopology.Points; + } + + /// <summary> + /// Queries the tessellation evaluation shader primitive winding order. + /// </summary> + /// <returns>True if the primitive winding order is clockwise, false if counter-clockwise</returns> + bool QueryTessCw() + { + return false; + } + + /// <summary> + /// Queries the tessellation evaluation shader abstract patch type. + /// </summary> + /// <returns>Abstract patch type</returns> + TessPatchType QueryTessPatchType() + { + return TessPatchType.Triangles; + } + + /// <summary> + /// Queries the tessellation evaluation shader spacing between tessellated vertices of the patch. + /// </summary> + /// <returns>Spacing between tessellated vertices of the patch</returns> + TessSpacing QueryTessSpacing() + { + return TessSpacing.EqualSpacing; + } + + /// <summary> + /// Queries texture format information, for shaders using image load or store. + /// </summary> + /// <remarks> + /// This only returns non-compressed color formats. + /// If the format of the texture is a compressed, depth or unsupported format, then a default value is returned. + /// </remarks> + /// <param name="handle">Texture handle</param> + /// <param name="cbufSlot">Constant buffer slot for the texture handle</param> + /// <returns>Color format of the non-compressed texture</returns> + TextureFormat QueryTextureFormat(int handle, int cbufSlot = -1) + { + return TextureFormat.R8G8B8A8Unorm; + } + + /// <summary> + /// Queries depth mode information from the GPU state. + /// </summary> + /// <returns>True if current depth mode is -1 to 1, false if 0 to 1</returns> + bool QueryTransformDepthMinusOneToOne() + { + return false; + } + + /// <summary> + /// Queries transform feedback enable state. + /// </summary> + /// <returns>True if the shader uses transform feedback, false otherwise</returns> + bool QueryTransformFeedbackEnabled() + { + return false; + } + + /// <summary> + /// Queries the varying locations that should be written to the transform feedback buffer. + /// </summary> + /// <param name="bufferIndex">Index of the transform feedback buffer</param> + /// <returns>Varying locations for the specified buffer</returns> + ReadOnlySpan<byte> QueryTransformFeedbackVaryingLocations(int bufferIndex) + { + return ReadOnlySpan<byte>.Empty; + } + + /// <summary> + /// Queries the stride (in bytes) of the per vertex data written into the transform feedback buffer. + /// </summary> + /// <param name="bufferIndex">Index of the transform feedback buffer</param> + /// <returns>Stride for the specified buffer</returns> + int QueryTransformFeedbackStride(int bufferIndex) + { + return 0; + } + + /// <summary> + /// Queries if host state forces early depth testing. + /// </summary> + /// <returns>True if early depth testing is forced</returns> + bool QueryEarlyZForce() + { + return false; + } + + /// <summary> + /// Queries if host state disables the viewport transform. + /// </summary> + /// <returns>True if the viewport transform is disabled</returns> + bool QueryViewportTransformDisable() + { + return false; + } + + /// <summary> + /// Registers a texture used by the shader. + /// </summary> + /// <param name="handle">Texture handle word offset</param> + /// <param name="cbufSlot">Constant buffer slot where the texture handle is located</param> + void RegisterTexture(int handle, int cbufSlot) + { + // Only useful when recording information for a disk shader cache. + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/InputTopology.cs b/src/Ryujinx.Graphics.Shader/InputTopology.cs new file mode 100644 index 00000000..da332909 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/InputTopology.cs @@ -0,0 +1,40 @@ +namespace Ryujinx.Graphics.Shader +{ + public enum InputTopology : byte + { + Points, + Lines, + LinesAdjacency, + Triangles, + TrianglesAdjacency + } + + static class InputTopologyExtensions + { + public static string ToGlslString(this InputTopology topology) + { + return topology switch + { + InputTopology.Points => "points", + InputTopology.Lines => "lines", + InputTopology.LinesAdjacency => "lines_adjacency", + InputTopology.Triangles => "triangles", + InputTopology.TrianglesAdjacency => "triangles_adjacency", + _ => "points" + }; + } + + public static int ToInputVertices(this InputTopology topology) + { + return topology switch + { + InputTopology.Points => 1, + InputTopology.Lines or + InputTopology.LinesAdjacency => 2, + InputTopology.Triangles or + InputTopology.TrianglesAdjacency => 3, + _ => 1 + }; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/AttributeMap.cs b/src/Ryujinx.Graphics.Shader/Instructions/AttributeMap.cs new file mode 100644 index 00000000..562fb8d5 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/AttributeMap.cs @@ -0,0 +1,351 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System.Collections.Generic; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static class AttributeMap + { + private enum StagesMask : byte + { + None = 0, + Compute = 1 << (int)ShaderStage.Compute, + Vertex = 1 << (int)ShaderStage.Vertex, + TessellationControl = 1 << (int)ShaderStage.TessellationControl, + TessellationEvaluation = 1 << (int)ShaderStage.TessellationEvaluation, + Geometry = 1 << (int)ShaderStage.Geometry, + Fragment = 1 << (int)ShaderStage.Fragment, + + Tessellation = TessellationControl | TessellationEvaluation, + VertexTessellationGeometry = Vertex | Tessellation | Geometry, + TessellationGeometryFragment = Tessellation | Geometry | Fragment, + AllGraphics = Vertex | Tessellation | Geometry | Fragment + } + + private struct AttributeEntry + { + public int BaseOffset { get; } + public AggregateType Type { get; } + public IoVariable IoVariable { get; } + public StagesMask InputMask { get; } + public StagesMask OutputMask { get; } + + public AttributeEntry( + int baseOffset, + AggregateType type, + IoVariable ioVariable, + StagesMask inputMask, + StagesMask outputMask) + { + BaseOffset = baseOffset; + Type = type; + IoVariable = ioVariable; + InputMask = inputMask; + OutputMask = outputMask; + } + } + + private static readonly IReadOnlyDictionary<int, AttributeEntry> _attributes; + private static readonly IReadOnlyDictionary<int, AttributeEntry> _attributesPerPatch; + + static AttributeMap() + { + _attributes = CreateMap(); + _attributesPerPatch = CreatePerPatchMap(); + } + + private static IReadOnlyDictionary<int, AttributeEntry> CreateMap() + { + var map = new Dictionary<int, AttributeEntry>(); + + Add(map, 0x060, AggregateType.S32, IoVariable.PrimitiveId, StagesMask.TessellationGeometryFragment, StagesMask.Geometry); + Add(map, 0x064, AggregateType.S32, IoVariable.Layer, StagesMask.Fragment, StagesMask.VertexTessellationGeometry); + Add(map, 0x068, AggregateType.S32, IoVariable.ViewportIndex, StagesMask.Fragment, StagesMask.VertexTessellationGeometry); + Add(map, 0x06c, AggregateType.FP32, IoVariable.PointSize, StagesMask.None, StagesMask.VertexTessellationGeometry); + Add(map, 0x070, AggregateType.Vector4 | AggregateType.FP32, IoVariable.Position, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry); + Add(map, 0x080, AggregateType.Vector4 | AggregateType.FP32, IoVariable.UserDefined, StagesMask.AllGraphics, StagesMask.VertexTessellationGeometry, 32); + Add(map, 0x280, AggregateType.Vector4 | AggregateType.FP32, IoVariable.FrontColorDiffuse, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry); + Add(map, 0x290, AggregateType.Vector4 | AggregateType.FP32, IoVariable.FrontColorSpecular, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry); + Add(map, 0x2a0, AggregateType.Vector4 | AggregateType.FP32, IoVariable.BackColorDiffuse, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry); + Add(map, 0x2b0, AggregateType.Vector4 | AggregateType.FP32, IoVariable.BackColorSpecular, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry); + Add(map, 0x2c0, AggregateType.Array | AggregateType.FP32, IoVariable.ClipDistance, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry, 8); + Add(map, 0x2e0, AggregateType.Vector2 | AggregateType.FP32, IoVariable.PointCoord, StagesMask.Fragment, StagesMask.None); + Add(map, 0x2e8, AggregateType.FP32, IoVariable.FogCoord, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry); + Add(map, 0x2f0, AggregateType.Vector2 | AggregateType.FP32, IoVariable.TessellationCoord, StagesMask.TessellationEvaluation, StagesMask.None); + Add(map, 0x2f8, AggregateType.S32, IoVariable.InstanceId, StagesMask.Vertex, StagesMask.None); + Add(map, 0x2fc, AggregateType.S32, IoVariable.VertexId, StagesMask.Vertex, StagesMask.None); + Add(map, 0x300, AggregateType.Vector4 | AggregateType.FP32, IoVariable.TextureCoord, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry); + Add(map, 0x3a0, AggregateType.Array | AggregateType.S32, IoVariable.ViewportMask, StagesMask.Fragment, StagesMask.VertexTessellationGeometry); + Add(map, 0x3fc, AggregateType.Bool, IoVariable.FrontFacing, StagesMask.Fragment, StagesMask.None); + + return map; + } + + private static IReadOnlyDictionary<int, AttributeEntry> CreatePerPatchMap() + { + var map = new Dictionary<int, AttributeEntry>(); + + Add(map, 0x000, AggregateType.Vector4 | AggregateType.FP32, IoVariable.TessellationLevelOuter, StagesMask.TessellationEvaluation, StagesMask.TessellationControl); + Add(map, 0x010, AggregateType.Vector2 | AggregateType.FP32, IoVariable.TessellationLevelInner, StagesMask.TessellationEvaluation, StagesMask.TessellationControl); + Add(map, 0x018, AggregateType.Vector4 | AggregateType.FP32, IoVariable.UserDefined, StagesMask.TessellationEvaluation, StagesMask.TessellationControl, 31, 0x200); + + return map; + } + + private static void Add( + Dictionary<int, AttributeEntry> attributes, + int offset, + AggregateType type, + IoVariable ioVariable, + StagesMask inputMask, + StagesMask outputMask, + int count = 1, + int upperBound = 0x400) + { + int baseOffset = offset; + + int elementsCount = GetElementCount(type); + + for (int index = 0; index < count; index++) + { + for (int elementIndex = 0; elementIndex < elementsCount; elementIndex++) + { + attributes.Add(offset, new AttributeEntry(baseOffset, type, ioVariable, inputMask, outputMask)); + + offset += 4; + + if (offset >= upperBound) + { + return; + } + } + } + } + + public static Operand GenerateAttributeLoad(EmitterContext context, Operand primVertex, int offset, bool isOutput, bool isPerPatch) + { + if (!(isPerPatch ? _attributesPerPatch : _attributes).TryGetValue(offset, out AttributeEntry entry)) + { + context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} is not valid."); + return Const(0); + } + + StagesMask validUseMask = isOutput ? entry.OutputMask : entry.InputMask; + + if (((StagesMask)(1 << (int)context.Config.Stage) & validUseMask) == StagesMask.None) + { + context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} ({entry.IoVariable}) is not valid for stage {context.Config.Stage}."); + return Const(0); + } + + if (!IsSupportedByHost(context.Config.GpuAccessor, context.Config.Stage, entry.IoVariable)) + { + context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} ({entry.IoVariable}) is not supported by the host for stage {context.Config.Stage}."); + return Const(0); + } + + if (HasInvocationId(context.Config.Stage, isOutput) && !isPerPatch) + { + primVertex = context.Load(StorageKind.Input, IoVariable.InvocationId); + } + + int innerOffset = offset - entry.BaseOffset; + int innerIndex = innerOffset / 4; + + StorageKind storageKind = isPerPatch + ? (isOutput ? StorageKind.OutputPerPatch : StorageKind.InputPerPatch) + : (isOutput ? StorageKind.Output : StorageKind.Input); + IoVariable ioVariable = GetIoVariable(context.Config.Stage, in entry); + AggregateType type = GetType(context.Config, isOutput, innerIndex, in entry); + int elementCount = GetElementCount(type); + + bool isArray = type.HasFlag(AggregateType.Array); + bool hasArrayIndex = isArray || context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput); + + bool hasElementIndex = elementCount > 1; + + if (hasArrayIndex && hasElementIndex) + { + int arrayIndex = innerIndex / elementCount; + int elementIndex = innerIndex - (arrayIndex * elementCount); + + return primVertex == null || isArray + ? context.Load(storageKind, ioVariable, primVertex, Const(arrayIndex), Const(elementIndex)) + : context.Load(storageKind, ioVariable, Const(arrayIndex), primVertex, Const(elementIndex)); + } + else if (hasArrayIndex || hasElementIndex) + { + return primVertex == null || isArray || !hasArrayIndex + ? context.Load(storageKind, ioVariable, primVertex, Const(innerIndex)) + : context.Load(storageKind, ioVariable, Const(innerIndex), primVertex); + } + else + { + return context.Load(storageKind, ioVariable, primVertex); + } + } + + public static void GenerateAttributeStore(EmitterContext context, int offset, bool isPerPatch, Operand value) + { + if (!(isPerPatch ? _attributesPerPatch : _attributes).TryGetValue(offset, out AttributeEntry entry)) + { + context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} is not valid."); + return; + } + + if (((StagesMask)(1 << (int)context.Config.Stage) & entry.OutputMask) == StagesMask.None) + { + context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} ({entry.IoVariable}) is not valid for stage {context.Config.Stage}."); + return; + } + + if (!IsSupportedByHost(context.Config.GpuAccessor, context.Config.Stage, entry.IoVariable)) + { + context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} ({entry.IoVariable}) is not supported by the host for stage {context.Config.Stage}."); + return; + } + + Operand invocationId = null; + + if (HasInvocationId(context.Config.Stage, isOutput: true) && !isPerPatch) + { + invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId); + } + + int innerOffset = offset - entry.BaseOffset; + int innerIndex = innerOffset / 4; + + StorageKind storageKind = isPerPatch ? StorageKind.OutputPerPatch : StorageKind.Output; + IoVariable ioVariable = GetIoVariable(context.Config.Stage, in entry); + AggregateType type = GetType(context.Config, isOutput: true, innerIndex, in entry); + int elementCount = GetElementCount(type); + + bool isArray = type.HasFlag(AggregateType.Array); + bool hasArrayIndex = isArray || context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput: true); + + bool hasElementIndex = elementCount > 1; + + if (hasArrayIndex && hasElementIndex) + { + int arrayIndex = innerIndex / elementCount; + int elementIndex = innerIndex - (arrayIndex * elementCount); + + if (invocationId == null || isArray) + { + context.Store(storageKind, ioVariable, invocationId, Const(arrayIndex), Const(elementIndex), value); + } + else + { + context.Store(storageKind, ioVariable, Const(arrayIndex), invocationId, Const(elementIndex), value); + } + } + else if (hasArrayIndex || hasElementIndex) + { + if (invocationId == null || isArray || !hasArrayIndex) + { + context.Store(storageKind, ioVariable, invocationId, Const(innerIndex), value); + } + else + { + context.Store(storageKind, ioVariable, Const(innerIndex), invocationId, value); + } + } + else + { + context.Store(storageKind, ioVariable, invocationId, value); + } + } + + private static bool IsSupportedByHost(IGpuAccessor gpuAccessor, ShaderStage stage, IoVariable ioVariable) + { + if (ioVariable == IoVariable.ViewportIndex && stage != ShaderStage.Geometry && stage != ShaderStage.Fragment) + { + return gpuAccessor.QueryHostSupportsViewportIndexVertexTessellation(); + } + else if (ioVariable == IoVariable.ViewportMask) + { + return gpuAccessor.QueryHostSupportsViewportMask(); + } + + return true; + } + + public static IoVariable GetIoVariable(ShaderConfig config, int offset, out int location) + { + location = 0; + + if (!_attributes.TryGetValue(offset, out AttributeEntry entry)) + { + return IoVariable.Invalid; + } + + if (((StagesMask)(1 << (int)config.Stage) & entry.OutputMask) == StagesMask.None) + { + return IoVariable.Invalid; + } + + if (config.HasPerLocationInputOrOutput(entry.IoVariable, isOutput: true)) + { + location = (offset - entry.BaseOffset) / 16; + } + + return GetIoVariable(config.Stage, in entry); + } + + private static IoVariable GetIoVariable(ShaderStage stage, in AttributeEntry entry) + { + if (entry.IoVariable == IoVariable.Position && stage == ShaderStage.Fragment) + { + return IoVariable.FragmentCoord; + } + + return entry.IoVariable; + } + + private static AggregateType GetType(ShaderConfig config, bool isOutput, int innerIndex, in AttributeEntry entry) + { + AggregateType type = entry.Type; + + if (entry.IoVariable == IoVariable.UserDefined) + { + type = config.GetUserDefinedType(innerIndex / 4, isOutput); + } + else if (entry.IoVariable == IoVariable.FragmentOutputColor) + { + type = config.GetFragmentOutputColorType(innerIndex / 4); + } + + return type; + } + + public static bool HasPrimitiveVertex(ShaderStage stage, bool isOutput) + { + if (isOutput) + { + return false; + } + + return stage == ShaderStage.TessellationControl || + stage == ShaderStage.TessellationEvaluation || + stage == ShaderStage.Geometry; + } + + public static bool HasInvocationId(ShaderStage stage, bool isOutput) + { + return isOutput && stage == ShaderStage.TessellationControl; + } + + private static int GetElementCount(AggregateType type) + { + return (type & AggregateType.ElementCountMask) switch + { + AggregateType.Vector2 => 2, + AggregateType.Vector3 => 3, + AggregateType.Vector4 => 4, + _ => 1 + }; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmit.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmit.cs new file mode 100644 index 00000000..3a9e658a --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmit.cs @@ -0,0 +1,379 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.Translation; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void AtomCas(EmitterContext context) + { + InstAtomCas op = context.GetOp<InstAtomCas>(); + + context.Config.GpuAccessor.Log("Shader instruction AtomCas is not implemented."); + } + + public static void AtomsCas(EmitterContext context) + { + InstAtomsCas op = context.GetOp<InstAtomsCas>(); + + context.Config.GpuAccessor.Log("Shader instruction AtomsCas is not implemented."); + } + + public static void B2r(EmitterContext context) + { + InstB2r op = context.GetOp<InstB2r>(); + + context.Config.GpuAccessor.Log("Shader instruction B2r is not implemented."); + } + + public static void Bpt(EmitterContext context) + { + InstBpt op = context.GetOp<InstBpt>(); + + context.Config.GpuAccessor.Log("Shader instruction Bpt is not implemented."); + } + + public static void Cctl(EmitterContext context) + { + InstCctl op = context.GetOp<InstCctl>(); + + context.Config.GpuAccessor.Log("Shader instruction Cctl is not implemented."); + } + + public static void Cctll(EmitterContext context) + { + InstCctll op = context.GetOp<InstCctll>(); + + context.Config.GpuAccessor.Log("Shader instruction Cctll is not implemented."); + } + + public static void Cctlt(EmitterContext context) + { + InstCctlt op = context.GetOp<InstCctlt>(); + + context.Config.GpuAccessor.Log("Shader instruction Cctlt is not implemented."); + } + + public static void Cs2r(EmitterContext context) + { + InstCs2r op = context.GetOp<InstCs2r>(); + + context.Config.GpuAccessor.Log("Shader instruction Cs2r is not implemented."); + } + + public static void FchkR(EmitterContext context) + { + InstFchkR op = context.GetOp<InstFchkR>(); + + context.Config.GpuAccessor.Log("Shader instruction FchkR is not implemented."); + } + + public static void FchkI(EmitterContext context) + { + InstFchkI op = context.GetOp<InstFchkI>(); + + context.Config.GpuAccessor.Log("Shader instruction FchkI is not implemented."); + } + + public static void FchkC(EmitterContext context) + { + InstFchkC op = context.GetOp<InstFchkC>(); + + context.Config.GpuAccessor.Log("Shader instruction FchkC is not implemented."); + } + + public static void Getcrsptr(EmitterContext context) + { + InstGetcrsptr op = context.GetOp<InstGetcrsptr>(); + + context.Config.GpuAccessor.Log("Shader instruction Getcrsptr is not implemented."); + } + + public static void Getlmembase(EmitterContext context) + { + InstGetlmembase op = context.GetOp<InstGetlmembase>(); + + context.Config.GpuAccessor.Log("Shader instruction Getlmembase is not implemented."); + } + + public static void Ide(EmitterContext context) + { + InstIde op = context.GetOp<InstIde>(); + + context.Config.GpuAccessor.Log("Shader instruction Ide is not implemented."); + } + + public static void IdpR(EmitterContext context) + { + InstIdpR op = context.GetOp<InstIdpR>(); + + context.Config.GpuAccessor.Log("Shader instruction IdpR is not implemented."); + } + + public static void IdpC(EmitterContext context) + { + InstIdpC op = context.GetOp<InstIdpC>(); + + context.Config.GpuAccessor.Log("Shader instruction IdpC is not implemented."); + } + + public static void ImadspR(EmitterContext context) + { + InstImadspR op = context.GetOp<InstImadspR>(); + + context.Config.GpuAccessor.Log("Shader instruction ImadspR is not implemented."); + } + + public static void ImadspI(EmitterContext context) + { + InstImadspI op = context.GetOp<InstImadspI>(); + + context.Config.GpuAccessor.Log("Shader instruction ImadspI is not implemented."); + } + + public static void ImadspC(EmitterContext context) + { + InstImadspC op = context.GetOp<InstImadspC>(); + + context.Config.GpuAccessor.Log("Shader instruction ImadspC is not implemented."); + } + + public static void ImadspRc(EmitterContext context) + { + InstImadspRc op = context.GetOp<InstImadspRc>(); + + context.Config.GpuAccessor.Log("Shader instruction ImadspRc is not implemented."); + } + + public static void Jcal(EmitterContext context) + { + InstJcal op = context.GetOp<InstJcal>(); + + context.Config.GpuAccessor.Log("Shader instruction Jcal is not implemented."); + } + + public static void Jmp(EmitterContext context) + { + InstJmp op = context.GetOp<InstJmp>(); + + context.Config.GpuAccessor.Log("Shader instruction Jmp is not implemented."); + } + + public static void Jmx(EmitterContext context) + { + InstJmx op = context.GetOp<InstJmx>(); + + context.Config.GpuAccessor.Log("Shader instruction Jmx is not implemented."); + } + + public static void Ld(EmitterContext context) + { + InstLd op = context.GetOp<InstLd>(); + + context.Config.GpuAccessor.Log("Shader instruction Ld is not implemented."); + } + + public static void Lepc(EmitterContext context) + { + InstLepc op = context.GetOp<InstLepc>(); + + context.Config.GpuAccessor.Log("Shader instruction Lepc is not implemented."); + } + + public static void Longjmp(EmitterContext context) + { + InstLongjmp op = context.GetOp<InstLongjmp>(); + + context.Config.GpuAccessor.Log("Shader instruction Longjmp is not implemented."); + } + + public static void P2rR(EmitterContext context) + { + InstP2rR op = context.GetOp<InstP2rR>(); + + context.Config.GpuAccessor.Log("Shader instruction P2rR is not implemented."); + } + + public static void P2rI(EmitterContext context) + { + InstP2rI op = context.GetOp<InstP2rI>(); + + context.Config.GpuAccessor.Log("Shader instruction P2rI is not implemented."); + } + + public static void P2rC(EmitterContext context) + { + InstP2rC op = context.GetOp<InstP2rC>(); + + context.Config.GpuAccessor.Log("Shader instruction P2rC is not implemented."); + } + + public static void Pexit(EmitterContext context) + { + InstPexit op = context.GetOp<InstPexit>(); + + context.Config.GpuAccessor.Log("Shader instruction Pexit is not implemented."); + } + + public static void Pixld(EmitterContext context) + { + InstPixld op = context.GetOp<InstPixld>(); + + context.Config.GpuAccessor.Log("Shader instruction Pixld is not implemented."); + } + + public static void Plongjmp(EmitterContext context) + { + InstPlongjmp op = context.GetOp<InstPlongjmp>(); + + context.Config.GpuAccessor.Log("Shader instruction Plongjmp is not implemented."); + } + + public static void Pret(EmitterContext context) + { + InstPret op = context.GetOp<InstPret>(); + + context.Config.GpuAccessor.Log("Shader instruction Pret is not implemented."); + } + + public static void PrmtR(EmitterContext context) + { + InstPrmtR op = context.GetOp<InstPrmtR>(); + + context.Config.GpuAccessor.Log("Shader instruction PrmtR is not implemented."); + } + + public static void PrmtI(EmitterContext context) + { + InstPrmtI op = context.GetOp<InstPrmtI>(); + + context.Config.GpuAccessor.Log("Shader instruction PrmtI is not implemented."); + } + + public static void PrmtC(EmitterContext context) + { + InstPrmtC op = context.GetOp<InstPrmtC>(); + + context.Config.GpuAccessor.Log("Shader instruction PrmtC is not implemented."); + } + + public static void PrmtRc(EmitterContext context) + { + InstPrmtRc op = context.GetOp<InstPrmtRc>(); + + context.Config.GpuAccessor.Log("Shader instruction PrmtRc is not implemented."); + } + + public static void R2b(EmitterContext context) + { + InstR2b op = context.GetOp<InstR2b>(); + + context.Config.GpuAccessor.Log("Shader instruction R2b is not implemented."); + } + + public static void Ram(EmitterContext context) + { + InstRam op = context.GetOp<InstRam>(); + + context.Config.GpuAccessor.Log("Shader instruction Ram is not implemented."); + } + + public static void Rtt(EmitterContext context) + { + InstRtt op = context.GetOp<InstRtt>(); + + context.Config.GpuAccessor.Log("Shader instruction Rtt is not implemented."); + } + + public static void Sam(EmitterContext context) + { + InstSam op = context.GetOp<InstSam>(); + + context.Config.GpuAccessor.Log("Shader instruction Sam is not implemented."); + } + + public static void Setcrsptr(EmitterContext context) + { + InstSetcrsptr op = context.GetOp<InstSetcrsptr>(); + + context.Config.GpuAccessor.Log("Shader instruction Setcrsptr is not implemented."); + } + + public static void Setlmembase(EmitterContext context) + { + InstSetlmembase op = context.GetOp<InstSetlmembase>(); + + context.Config.GpuAccessor.Log("Shader instruction Setlmembase is not implemented."); + } + + public static void St(EmitterContext context) + { + InstSt op = context.GetOp<InstSt>(); + + context.Config.GpuAccessor.Log("Shader instruction St is not implemented."); + } + + public static void Stp(EmitterContext context) + { + InstStp op = context.GetOp<InstStp>(); + + context.Config.GpuAccessor.Log("Shader instruction Stp is not implemented."); + } + + public static void Txa(EmitterContext context) + { + InstTxa op = context.GetOp<InstTxa>(); + + context.Config.GpuAccessor.Log("Shader instruction Txa is not implemented."); + } + + public static void Vabsdiff(EmitterContext context) + { + InstVabsdiff op = context.GetOp<InstVabsdiff>(); + + context.Config.GpuAccessor.Log("Shader instruction Vabsdiff is not implemented."); + } + + public static void Vabsdiff4(EmitterContext context) + { + InstVabsdiff4 op = context.GetOp<InstVabsdiff4>(); + + context.Config.GpuAccessor.Log("Shader instruction Vabsdiff4 is not implemented."); + } + + public static void Vadd(EmitterContext context) + { + InstVadd op = context.GetOp<InstVadd>(); + + context.Config.GpuAccessor.Log("Shader instruction Vadd is not implemented."); + } + + public static void Votevtg(EmitterContext context) + { + InstVotevtg op = context.GetOp<InstVotevtg>(); + + context.Config.GpuAccessor.Log("Shader instruction Votevtg is not implemented."); + } + + public static void Vset(EmitterContext context) + { + InstVset op = context.GetOp<InstVset>(); + + context.Config.GpuAccessor.Log("Shader instruction Vset is not implemented."); + } + + public static void Vshl(EmitterContext context) + { + InstVshl op = context.GetOp<InstVshl>(); + + context.Config.GpuAccessor.Log("Shader instruction Vshl is not implemented."); + } + + public static void Vshr(EmitterContext context) + { + InstVshr op = context.GetOp<InstVshr>(); + + context.Config.GpuAccessor.Log("Shader instruction Vshr is not implemented."); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs new file mode 100644 index 00000000..879075ba --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs @@ -0,0 +1,160 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static class InstEmitAluHelper + { + public static long GetIntMin(IDstFmt type) + { + return type switch + { + IDstFmt.U16 => ushort.MinValue, + IDstFmt.S16 => short.MinValue, + IDstFmt.U32 => uint.MinValue, + IDstFmt.S32 => int.MinValue, + _ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.") + }; + } + + public static long GetIntMax(IDstFmt type) + { + return type switch + { + IDstFmt.U16 => ushort.MaxValue, + IDstFmt.S16 => short.MaxValue, + IDstFmt.U32 => uint.MaxValue, + IDstFmt.S32 => int.MaxValue, + _ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.") + }; + } + + public static long GetIntMin(ISrcDstFmt type) + { + return type switch + { + ISrcDstFmt.U8 => byte.MinValue, + ISrcDstFmt.S8 => sbyte.MinValue, + ISrcDstFmt.U16 => ushort.MinValue, + ISrcDstFmt.S16 => short.MinValue, + ISrcDstFmt.U32 => uint.MinValue, + ISrcDstFmt.S32 => int.MinValue, + _ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.") + }; + } + + public static long GetIntMax(ISrcDstFmt type) + { + return type switch + { + ISrcDstFmt.U8 => byte.MaxValue, + ISrcDstFmt.S8 => sbyte.MaxValue, + ISrcDstFmt.U16 => ushort.MaxValue, + ISrcDstFmt.S16 => short.MaxValue, + ISrcDstFmt.U32 => uint.MaxValue, + ISrcDstFmt.S32 => int.MaxValue, + _ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.") + }; + } + + public static Operand GetPredLogicalOp(EmitterContext context, BoolOp logicOp, Operand input, Operand pred) + { + return logicOp switch + { + BoolOp.And => context.BitwiseAnd(input, pred), + BoolOp.Or => context.BitwiseOr(input, pred), + BoolOp.Xor => context.BitwiseExclusiveOr(input, pred), + _ => input + }; + } + + public static Operand Extend(EmitterContext context, Operand src, VectorSelect type) + { + return type switch + { + VectorSelect.U8B0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8), + VectorSelect.U8B1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8), + VectorSelect.U8B2 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8), + VectorSelect.U8B3 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8), + VectorSelect.U16H0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16), + VectorSelect.U16H1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16), + VectorSelect.S8B0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8), + VectorSelect.S8B1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8), + VectorSelect.S8B2 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8), + VectorSelect.S8B3 => SignExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8), + VectorSelect.S16H0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16), + VectorSelect.S16H1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16), + _ => src + }; + } + + public static void SetZnFlags(EmitterContext context, Operand dest, bool setCC, bool extended = false) + { + if (!setCC) + { + return; + } + + if (extended) + { + // When the operation is extended, it means we are doing + // the operation on a long word with any number of bits, + // so we need to AND the zero flag from result with the + // previous result when extended is specified, to ensure + // we have ZF set only if all words are zero, and not just + // the last one. + Operand oldZF = GetZF(); + + Operand res = context.BitwiseAnd(context.ICompareEqual(dest, Const(0)), oldZF); + + context.Copy(GetZF(), res); + } + else + { + context.Copy(GetZF(), context.ICompareEqual(dest, Const(0))); + } + + context.Copy(GetNF(), context.ICompareLess(dest, Const(0))); + } + + public static void SetFPZnFlags(EmitterContext context, Operand dest, bool setCC, Instruction fpType = Instruction.FP32) + { + if (setCC) + { + Operand zero = ConstF(0); + + if (fpType == Instruction.FP64) + { + zero = context.FP32ConvertToFP64(zero); + } + + context.Copy(GetZF(), context.FPCompareEqual(dest, zero, fpType)); + context.Copy(GetNF(), context.FPCompareLess (dest, zero, fpType)); + } + } + + public static (Operand, Operand) NegateLong(EmitterContext context, Operand low, Operand high) + { + low = context.BitwiseNot(low); + high = context.BitwiseNot(high); + low = AddWithCarry(context, low, Const(1), out Operand carryOut); + high = context.IAdd(high, carryOut); + return (low, high); + } + + public static Operand AddWithCarry(EmitterContext context, Operand lhs, Operand rhs, out Operand carryOut) + { + Operand result = context.IAdd(lhs, rhs); + + // C = Rd < Rn + carryOut = context.INegate(context.ICompareLessUnsigned(result, lhs)); + + return result; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs new file mode 100644 index 00000000..1df38761 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs @@ -0,0 +1,383 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void Al2p(EmitterContext context) + { + InstAl2p op = context.GetOp<InstAl2p>(); + + context.Copy(GetDest(op.Dest), context.IAdd(GetSrcReg(context, op.SrcA), Const(op.Imm11))); + } + + public static void Ald(EmitterContext context) + { + InstAld op = context.GetOp<InstAld>(); + + // Some of those attributes are per invocation, + // so we should ignore any primitive vertex indexing for those. + bool hasPrimitiveVertex = AttributeMap.HasPrimitiveVertex(context.Config.Stage, op.O) && !op.P; + + if (!op.Phys) + { + hasPrimitiveVertex &= HasPrimitiveVertex(op.Imm11); + } + + Operand primVertex = hasPrimitiveVertex ? context.Copy(GetSrcReg(context, op.SrcB)) : null; + + for (int index = 0; index < (int)op.AlSize + 1; index++) + { + Register rd = new Register(op.Dest + index, RegisterType.Gpr); + + if (rd.IsRZ) + { + break; + } + + if (op.Phys) + { + Operand offset = context.ISubtract(GetSrcReg(context, op.SrcA), Const(AttributeConsts.UserAttributeBase)); + Operand vecIndex = context.ShiftRightU32(offset, Const(4)); + Operand elemIndex = context.BitwiseAnd(context.ShiftRightU32(offset, Const(2)), Const(3)); + + StorageKind storageKind = op.O ? StorageKind.Output : StorageKind.Input; + + context.Copy(Register(rd), context.Load(storageKind, IoVariable.UserDefined, primVertex, vecIndex, elemIndex)); + } + else if (op.SrcB == RegisterConsts.RegisterZeroIndex || op.P) + { + int offset = FixedFuncToUserAttribute(context.Config, op.Imm11 + index * 4, op.O); + + context.FlagAttributeRead(offset); + + bool isOutput = op.O && CanLoadOutput(offset); + + if (!op.P && !isOutput && TryConvertIdToIndexForVulkan(context, offset, out Operand value)) + { + context.Copy(Register(rd), value); + } + else + { + context.Copy(Register(rd), AttributeMap.GenerateAttributeLoad(context, primVertex, offset, isOutput, op.P)); + } + } + else + { + int offset = FixedFuncToUserAttribute(context.Config, op.Imm11 + index * 4, op.O); + + context.FlagAttributeRead(offset); + + bool isOutput = op.O && CanLoadOutput(offset); + + context.Copy(Register(rd), AttributeMap.GenerateAttributeLoad(context, primVertex, offset, isOutput, false)); + } + } + } + + public static void Ast(EmitterContext context) + { + InstAst op = context.GetOp<InstAst>(); + + for (int index = 0; index < (int)op.AlSize + 1; index++) + { + if (op.SrcB + index > RegisterConsts.RegisterZeroIndex) + { + break; + } + + Register rd = new Register(op.SrcB + index, RegisterType.Gpr); + + if (op.Phys) + { + Operand offset = context.ISubtract(GetSrcReg(context, op.SrcA), Const(AttributeConsts.UserAttributeBase)); + Operand vecIndex = context.ShiftRightU32(offset, Const(4)); + Operand elemIndex = context.BitwiseAnd(context.ShiftRightU32(offset, Const(2)), Const(3)); + Operand invocationId = AttributeMap.HasInvocationId(context.Config.Stage, isOutput: true) + ? context.Load(StorageKind.Input, IoVariable.InvocationId) + : null; + + context.Store(StorageKind.Output, IoVariable.UserDefined, invocationId, vecIndex, elemIndex, Register(rd)); + } + else + { + // TODO: Support indirect stores using Ra. + + int offset = op.Imm11 + index * 4; + + if (!context.Config.IsUsedOutputAttribute(offset)) + { + return; + } + + offset = FixedFuncToUserAttribute(context.Config, offset, isOutput: true); + + context.FlagAttributeWritten(offset); + + AttributeMap.GenerateAttributeStore(context, offset, op.P, Register(rd)); + } + } + } + + public static void Ipa(EmitterContext context) + { + InstIpa op = context.GetOp<InstIpa>(); + + context.FlagAttributeRead(op.Imm10); + + Operand res; + + bool isFixedFunc = false; + + if (op.Idx) + { + Operand offset = context.ISubtract(GetSrcReg(context, op.SrcA), Const(AttributeConsts.UserAttributeBase)); + Operand vecIndex = context.ShiftRightU32(offset, Const(4)); + Operand elemIndex = context.BitwiseAnd(context.ShiftRightU32(offset, Const(2)), Const(3)); + + res = context.Load(StorageKind.Input, IoVariable.UserDefined, null, vecIndex, elemIndex); + res = context.FPMultiply(res, context.Load(StorageKind.Input, IoVariable.FragmentCoord, null, Const(3))); + } + else + { + isFixedFunc = TryFixedFuncToUserAttributeIpa(context, op.Imm10, out res); + + if (op.Imm10 >= AttributeConsts.UserAttributeBase && op.Imm10 < AttributeConsts.UserAttributeEnd) + { + int index = (op.Imm10 - AttributeConsts.UserAttributeBase) >> 4; + + if (context.Config.ImapTypes[index].GetFirstUsedType() == PixelImap.Perspective) + { + res = context.FPMultiply(res, context.Load(StorageKind.Input, IoVariable.FragmentCoord, null, Const(3))); + } + } + else if (op.Imm10 == AttributeConsts.PositionX || op.Imm10 == AttributeConsts.PositionY) + { + // FragCoord X/Y must be divided by the render target scale, if resolution scaling is active, + // because the shader code is not expecting scaled values. + res = context.FPDivide(res, context.Load(StorageKind.Input, IoVariable.SupportBlockRenderScale, null, Const(0))); + } + else if (op.Imm10 == AttributeConsts.FrontFacing && context.Config.GpuAccessor.QueryHostHasFrontFacingBug()) + { + // gl_FrontFacing sometimes has incorrect (flipped) values depending how it is accessed on Intel GPUs. + // This weird trick makes it behave. + res = context.ICompareLess(context.INegate(context.IConvertS32ToFP32(res)), Const(0)); + } + } + + if (op.IpaOp == IpaOp.Multiply && !isFixedFunc) + { + Operand srcB = GetSrcReg(context, op.SrcB); + + res = context.FPMultiply(res, srcB); + } + + res = context.FPSaturate(res, op.Sat); + + context.Copy(GetDest(op.Dest), res); + } + + public static void Isberd(EmitterContext context) + { + InstIsberd op = context.GetOp<InstIsberd>(); + + // This instruction performs a load from ISBE (Internal Stage Buffer Entry) memory. + // Here, we just propagate the offset, as the result from this instruction is usually + // used with ALD to perform vertex load on geometry or tessellation shaders. + // The offset is calculated as (PrimitiveIndex * VerticesPerPrimitive) + VertexIndex. + // Since we hardcode PrimitiveIndex to zero, then the offset will be just VertexIndex. + context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcA)); + } + + public static void OutR(EmitterContext context) + { + InstOutR op = context.GetOp<InstOutR>(); + + EmitOut(context, op.OutType.HasFlag(OutType.Emit), op.OutType.HasFlag(OutType.Cut)); + } + + public static void OutI(EmitterContext context) + { + InstOutI op = context.GetOp<InstOutI>(); + + EmitOut(context, op.OutType.HasFlag(OutType.Emit), op.OutType.HasFlag(OutType.Cut)); + } + + public static void OutC(EmitterContext context) + { + InstOutC op = context.GetOp<InstOutC>(); + + EmitOut(context, op.OutType.HasFlag(OutType.Emit), op.OutType.HasFlag(OutType.Cut)); + } + + private static void EmitOut(EmitterContext context, bool emit, bool cut) + { + if (!(emit || cut)) + { + context.Config.GpuAccessor.Log("Invalid OUT encoding."); + } + + if (emit) + { + if (context.Config.LastInVertexPipeline) + { + context.PrepareForVertexReturn(out var tempXLocal, out var tempYLocal, out var tempZLocal); + + context.EmitVertex(); + + // Restore output position value before transformation. + + if (tempXLocal != null) + { + context.Copy(context.Load(StorageKind.Input, IoVariable.Position, null, Const(0)), tempXLocal); + } + + if (tempYLocal != null) + { + context.Copy(context.Load(StorageKind.Input, IoVariable.Position, null, Const(1)), tempYLocal); + } + + if (tempZLocal != null) + { + context.Copy(context.Load(StorageKind.Input, IoVariable.Position, null, Const(2)), tempZLocal); + } + } + else + { + context.EmitVertex(); + } + } + + if (cut) + { + context.EndPrimitive(); + } + } + + private static bool HasPrimitiveVertex(int attr) + { + return attr != AttributeConsts.PrimitiveId && + attr != AttributeConsts.TessCoordX && + attr != AttributeConsts.TessCoordY; + } + + private static bool CanLoadOutput(int attr) + { + return attr != AttributeConsts.TessCoordX && attr != AttributeConsts.TessCoordY; + } + + private static bool TryFixedFuncToUserAttributeIpa(EmitterContext context, int attr, out Operand selectedAttr) + { + if (attr >= AttributeConsts.FrontColorDiffuseR && attr < AttributeConsts.BackColorDiffuseR) + { + // TODO: If two sided rendering is enabled, then this should return + // FrontColor if the fragment is front facing, and back color otherwise. + selectedAttr = GenerateIpaLoad(context, FixedFuncToUserAttribute(context.Config, attr, isOutput: false)); + return true; + } + else if (attr == AttributeConsts.FogCoord) + { + // TODO: We likely need to emulate the fixed-function functionality for FogCoord here. + selectedAttr = GenerateIpaLoad(context, FixedFuncToUserAttribute(context.Config, attr, isOutput: false)); + return true; + } + else if (attr >= AttributeConsts.BackColorDiffuseR && attr < AttributeConsts.ClipDistance0) + { + selectedAttr = ConstF(((attr >> 2) & 3) == 3 ? 1f : 0f); + return true; + } + else if (attr >= AttributeConsts.TexCoordBase && attr < AttributeConsts.TexCoordEnd) + { + selectedAttr = GenerateIpaLoad(context, FixedFuncToUserAttribute(context.Config, attr, isOutput: false)); + return true; + } + + selectedAttr = GenerateIpaLoad(context, attr); + return false; + } + + private static Operand GenerateIpaLoad(EmitterContext context, int offset) + { + return AttributeMap.GenerateAttributeLoad(context, null, offset, isOutput: false, isPerPatch: false); + } + + private static int FixedFuncToUserAttribute(ShaderConfig config, int attr, bool isOutput) + { + bool supportsLayerFromVertexOrTess = config.GpuAccessor.QueryHostSupportsLayerVertexTessellation(); + int fixedStartAttr = supportsLayerFromVertexOrTess ? 0 : 1; + + if (attr == AttributeConsts.Layer && config.Stage != ShaderStage.Geometry && !supportsLayerFromVertexOrTess) + { + attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.Layer, 0, isOutput); + config.SetLayerOutputAttribute(attr); + } + else if (attr == AttributeConsts.FogCoord) + { + attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.FogCoord, fixedStartAttr, isOutput); + } + else if (attr >= AttributeConsts.FrontColorDiffuseR && attr < AttributeConsts.ClipDistance0) + { + attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.FrontColorDiffuseR, fixedStartAttr + 1, isOutput); + } + else if (attr >= AttributeConsts.TexCoordBase && attr < AttributeConsts.TexCoordEnd) + { + attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.TexCoordBase, fixedStartAttr + 5, isOutput); + } + + return attr; + } + + private static int FixedFuncToUserAttribute(ShaderConfig config, int attr, int baseAttr, int baseIndex, bool isOutput) + { + int index = (attr - baseAttr) >> 4; + int userAttrIndex = config.GetFreeUserAttribute(isOutput, baseIndex + index); + + if ((uint)userAttrIndex < Constants.MaxAttributes) + { + attr = AttributeConsts.UserAttributeBase + userAttrIndex * 16 + (attr & 0xf); + + if (isOutput) + { + config.SetOutputUserAttributeFixedFunc(userAttrIndex); + } + else + { + config.SetInputUserAttributeFixedFunc(userAttrIndex); + } + } + else + { + config.GpuAccessor.Log($"No enough user attributes for fixed attribute offset 0x{attr:X}."); + } + + return attr; + } + + private static bool TryConvertIdToIndexForVulkan(EmitterContext context, int attr, out Operand value) + { + if (context.Config.Options.TargetApi == TargetApi.Vulkan) + { + if (attr == AttributeConsts.InstanceId) + { + value = context.ISubtract( + context.Load(StorageKind.Input, IoVariable.InstanceIndex), + context.Load(StorageKind.Input, IoVariable.BaseInstance)); + return true; + } + else if (attr == AttributeConsts.VertexId) + { + value = context.Load(StorageKind.Input, IoVariable.VertexIndex); + return true; + } + } + + value = null; + return false; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitBarrier.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitBarrier.cs new file mode 100644 index 00000000..f3114c6e --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitBarrier.cs @@ -0,0 +1,44 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.Translation; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void Bar(EmitterContext context) + { + InstBar op = context.GetOp<InstBar>(); + + // TODO: Support other modes. + if (op.BarOp == BarOp.Sync) + { + context.Barrier(); + } + else + { + context.Config.GpuAccessor.Log($"Invalid barrier mode: {op.BarOp}."); + } + } + + public static void Depbar(EmitterContext context) + { + InstDepbar op = context.GetOp<InstDepbar>(); + + // No operation. + } + + public static void Membar(EmitterContext context) + { + InstMembar op = context.GetOp<InstMembar>(); + + if (op.Membar == Decoders.Membar.Cta) + { + context.GroupMemoryBarrier(); + } + else + { + context.MemoryBarrier(); + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitBitfield.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitBitfield.cs new file mode 100644 index 00000000..71925269 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitBitfield.cs @@ -0,0 +1,194 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void BfeR(EmitterContext context) + { + InstBfeR op = context.GetOp<InstBfeR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitBfe(context, srcA, srcB, op.Dest, op.Brev, op.Signed); + } + + public static void BfeI(EmitterContext context) + { + InstBfeI op = context.GetOp<InstBfeI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitBfe(context, srcA, srcB, op.Dest, op.Brev, op.Signed); + } + + public static void BfeC(EmitterContext context) + { + InstBfeC op = context.GetOp<InstBfeC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitBfe(context, srcA, srcB, op.Dest, op.Brev, op.Signed); + } + + public static void BfiR(EmitterContext context) + { + InstBfiR op = context.GetOp<InstBfiR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcC = GetSrcReg(context, op.SrcC); + + EmitBfi(context, srcA, srcB, srcC, op.Dest); + } + + public static void BfiI(EmitterContext context) + { + InstBfiI op = context.GetOp<InstBfiI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + var srcC = GetSrcReg(context, op.SrcC); + + EmitBfi(context, srcA, srcB, srcC, op.Dest); + } + + public static void BfiC(EmitterContext context) + { + InstBfiC op = context.GetOp<InstBfiC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + var srcC = GetSrcReg(context, op.SrcC); + + EmitBfi(context, srcA, srcB, srcC, op.Dest); + } + + public static void BfiRc(EmitterContext context) + { + InstBfiRc op = context.GetOp<InstBfiRc>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcC); + var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitBfi(context, srcA, srcB, srcC, op.Dest); + } + + public static void FloR(EmitterContext context) + { + InstFloR op = context.GetOp<InstFloR>(); + + EmitFlo(context, GetSrcReg(context, op.SrcB), op.Dest, op.NegB, op.Sh, op.Signed); + } + + public static void FloI(EmitterContext context) + { + InstFloI op = context.GetOp<InstFloI>(); + + EmitFlo(context, GetSrcImm(context, Imm20ToSInt(op.Imm20)), op.Dest, op.NegB, op.Sh, op.Signed); + } + + public static void FloC(EmitterContext context) + { + InstFloC op = context.GetOp<InstFloC>(); + + EmitFlo(context, GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.NegB, op.Sh, op.Signed); + } + + public static void PopcR(EmitterContext context) + { + InstPopcR op = context.GetOp<InstPopcR>(); + + EmitPopc(context, GetSrcReg(context, op.SrcB), op.Dest, op.NegB); + } + + public static void PopcI(EmitterContext context) + { + InstPopcI op = context.GetOp<InstPopcI>(); + + EmitPopc(context, GetSrcImm(context, Imm20ToSInt(op.Imm20)), op.Dest, op.NegB); + } + + public static void PopcC(EmitterContext context) + { + InstPopcC op = context.GetOp<InstPopcC>(); + + EmitPopc(context, GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.NegB); + } + + private static void EmitBfe( + EmitterContext context, + Operand srcA, + Operand srcB, + int rd, + bool bitReverse, + bool isSigned) + { + if (bitReverse) + { + srcA = context.BitfieldReverse(srcA); + } + + Operand position = context.BitwiseAnd(srcB, Const(0xff)); + + Operand size = context.BitfieldExtractU32(srcB, Const(8), Const(8)); + + Operand res = isSigned + ? context.BitfieldExtractS32(srcA, position, size) + : context.BitfieldExtractU32(srcA, position, size); + + context.Copy(GetDest(rd), res); + + // TODO: CC, X, corner cases. + } + + private static void EmitBfi(EmitterContext context, Operand srcA, Operand srcB, Operand srcC, int rd) + { + Operand position = context.BitwiseAnd(srcB, Const(0xff)); + + Operand size = context.BitfieldExtractU32(srcB, Const(8), Const(8)); + + Operand res = context.BitfieldInsert(srcC, srcA, position, size); + + context.Copy(GetDest(rd), res); + } + + private static void EmitFlo(EmitterContext context, Operand src, int rd, bool invert, bool sh, bool isSigned) + { + Operand srcB = context.BitwiseNot(src, invert); + + Operand res; + + if (sh) + { + res = context.FindLSB(context.BitfieldReverse(srcB)); + } + else + { + res = isSigned + ? context.FindMSBS32(srcB) + : context.FindMSBU32(srcB); + } + + context.Copy(GetDest(rd), res); + } + + private static void EmitPopc(EmitterContext context, Operand src, int rd, bool invert) + { + Operand srcB = context.BitwiseNot(src, invert); + + Operand res = context.BitCount(srcB); + + context.Copy(GetDest(rd), res); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitConditionCode.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitConditionCode.cs new file mode 100644 index 00000000..74ac7602 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitConditionCode.cs @@ -0,0 +1,87 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper; +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void Cset(EmitterContext context) + { + InstCset op = context.GetOp<InstCset>(); + + Operand res = GetCondition(context, op.Ccc); + Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + res = GetPredLogicalOp(context, op.Bop, res, srcPred); + + Operand dest = GetDest(op.Dest); + + if (op.BVal) + { + context.Copy(dest, context.ConditionalSelect(res, ConstF(1), Const(0))); + } + else + { + context.Copy(dest, res); + } + + // TODO: CC. + } + + public static void Csetp(EmitterContext context) + { + InstCsetp op = context.GetOp<InstCsetp>(); + + Operand p0Res = GetCondition(context, op.Ccc); + Operand p1Res = context.BitwiseNot(p0Res); + Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + p0Res = GetPredLogicalOp(context, op.Bop, p0Res, srcPred); + p1Res = GetPredLogicalOp(context, op.Bop, p1Res, srcPred); + + context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res); + context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res); + + // TODO: CC. + } + + private static Operand GetCondition(EmitterContext context, Ccc cond, int defaultCond = IrConsts.True) + { + return cond switch + { + Ccc.F => Const(IrConsts.False), + Ccc.Lt => context.BitwiseExclusiveOr(context.BitwiseAnd(GetNF(), context.BitwiseNot(GetZF())), GetVF()), + Ccc.Eq => context.BitwiseAnd(context.BitwiseNot(GetNF()), GetZF()), + Ccc.Le => context.BitwiseExclusiveOr(GetNF(), context.BitwiseOr(GetZF(), GetVF())), + Ccc.Gt => context.BitwiseNot(context.BitwiseOr(context.BitwiseExclusiveOr(GetNF(), GetVF()), GetZF())), + Ccc.Ne => context.BitwiseNot(GetZF()), + Ccc.Ge => context.BitwiseNot(context.BitwiseExclusiveOr(GetNF(), GetVF())), + Ccc.Num => context.BitwiseNot(context.BitwiseAnd(GetNF(), GetZF())), + Ccc.Nan => context.BitwiseAnd(GetNF(), GetZF()), + Ccc.Ltu => context.BitwiseExclusiveOr(GetNF(), GetVF()), + Ccc.Equ => GetZF(), + Ccc.Leu => context.BitwiseOr(context.BitwiseExclusiveOr(GetNF(), GetVF()), GetZF()), + Ccc.Gtu => context.BitwiseExclusiveOr(context.BitwiseNot(GetNF()), context.BitwiseOr(GetVF(), GetZF())), + Ccc.Neu => context.BitwiseOr(GetNF(), context.BitwiseNot(GetZF())), + Ccc.Geu => context.BitwiseExclusiveOr(context.BitwiseOr(context.BitwiseNot(GetNF()), GetZF()), GetVF()), + Ccc.T => Const(IrConsts.True), + Ccc.Off => context.BitwiseNot(GetVF()), + Ccc.Lo => context.BitwiseNot(GetCF()), + Ccc.Sff => context.BitwiseNot(GetNF()), + Ccc.Ls => context.BitwiseOr(GetZF(), context.BitwiseNot(GetCF())), + Ccc.Hi => context.BitwiseAnd(GetCF(), context.BitwiseNot(GetZF())), + Ccc.Sft => GetNF(), + Ccc.Hs => GetCF(), + Ccc.Oft => GetVF(), + Ccc.Rle => context.BitwiseOr(GetNF(), GetZF()), + Ccc.Rgt => context.BitwiseNot(context.BitwiseOr(GetNF(), GetZF())), + _ => Const(defaultCond) + }; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitConversion.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitConversion.cs new file mode 100644 index 00000000..bebd96dd --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitConversion.cs @@ -0,0 +1,425 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper; +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void F2fR(EmitterContext context) + { + InstF2fR op = context.GetOp<InstF2fR>(); + + var src = UnpackReg(context, op.SrcFmt, op.Sh, op.SrcB); + + EmitF2F(context, op.SrcFmt, op.DstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB, op.Sat); + } + + public static void F2fI(EmitterContext context) + { + InstF2fI op = context.GetOp<InstF2fI>(); + + var src = UnpackImm(context, op.SrcFmt, op.Sh, Imm20ToFloat(op.Imm20)); + + EmitF2F(context, op.SrcFmt, op.DstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB, op.Sat); + } + + public static void F2fC(EmitterContext context) + { + InstF2fC op = context.GetOp<InstF2fC>(); + + var src = UnpackCbuf(context, op.SrcFmt, op.Sh, op.CbufSlot, op.CbufOffset); + + EmitF2F(context, op.SrcFmt, op.DstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB, op.Sat); + } + + public static void F2iR(EmitterContext context) + { + InstF2iR op = context.GetOp<InstF2iR>(); + + var src = UnpackReg(context, op.SrcFmt, op.Sh, op.SrcB); + + EmitF2I(context, op.SrcFmt, op.IDstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB); + } + + public static void F2iI(EmitterContext context) + { + InstF2iI op = context.GetOp<InstF2iI>(); + + var src = UnpackImm(context, op.SrcFmt, op.Sh, Imm20ToFloat(op.Imm20)); + + EmitF2I(context, op.SrcFmt, op.IDstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB); + } + + public static void F2iC(EmitterContext context) + { + InstF2iC op = context.GetOp<InstF2iC>(); + + var src = UnpackCbuf(context, op.SrcFmt, op.Sh, op.CbufSlot, op.CbufOffset); + + EmitF2I(context, op.SrcFmt, op.IDstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB); + } + + public static void I2fR(EmitterContext context) + { + InstI2fR op = context.GetOp<InstI2fR>(); + + var src = GetSrcReg(context, op.SrcB); + + EmitI2F(context, op.ISrcFmt, op.DstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB); + } + + public static void I2fI(EmitterContext context) + { + InstI2fI op = context.GetOp<InstI2fI>(); + + var src = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitI2F(context, op.ISrcFmt, op.DstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB); + } + + public static void I2fC(EmitterContext context) + { + InstI2fC op = context.GetOp<InstI2fC>(); + + var src = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitI2F(context, op.ISrcFmt, op.DstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB); + } + + public static void I2iR(EmitterContext context) + { + InstI2iR op = context.GetOp<InstI2iR>(); + + var src = GetSrcReg(context, op.SrcB); + + EmitI2I(context, op.ISrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat, op.WriteCC); + } + + public static void I2iI(EmitterContext context) + { + InstI2iI op = context.GetOp<InstI2iI>(); + + var src = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitI2I(context, op.ISrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat, op.WriteCC); + } + + public static void I2iC(EmitterContext context) + { + InstI2iC op = context.GetOp<InstI2iC>(); + + var src = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitI2I(context, op.ISrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat, op.WriteCC); + } + + private static void EmitF2F( + EmitterContext context, + DstFmt srcType, + DstFmt dstType, + IntegerRound roundingMode, + Operand src, + int rd, + bool absolute, + bool negate, + bool saturate) + { + Operand srcB = context.FPAbsNeg(src, absolute, negate, srcType.ToInstFPType()); + + if (srcType == dstType) + { + srcB = roundingMode switch + { + IntegerRound.Round => context.FPRound(srcB, srcType.ToInstFPType()), + IntegerRound.Floor => context.FPFloor(srcB, srcType.ToInstFPType()), + IntegerRound.Ceil => context.FPCeiling(srcB, srcType.ToInstFPType()), + IntegerRound.Trunc => context.FPTruncate(srcB, srcType.ToInstFPType()), + _ => srcB + }; + } + + // We don't need to handle conversions between FP16 <-> FP32 + // since we do FP16 operations as FP32 directly. + // FP16 <-> FP64 conversions are invalid. + if (srcType == DstFmt.F32 && dstType == DstFmt.F64) + { + srcB = context.FP32ConvertToFP64(srcB); + } + else if (srcType == DstFmt.F64 && dstType == DstFmt.F32) + { + srcB = context.FP64ConvertToFP32(srcB); + } + + srcB = context.FPSaturate(srcB, saturate, dstType.ToInstFPType()); + + WriteFP(context, dstType, srcB, rd); + + // TODO: CC. + } + + private static void EmitF2I( + EmitterContext context, + DstFmt srcType, + IDstFmt dstType, + RoundMode2 roundingMode, + Operand src, + int rd, + bool absolute, + bool negate) + { + if (dstType == IDstFmt.U64) + { + context.Config.GpuAccessor.Log("Unimplemented 64-bits F2I."); + } + + Instruction fpType = srcType.ToInstFPType(); + + bool isSignedInt = dstType == IDstFmt.S16 || dstType == IDstFmt.S32 || dstType == IDstFmt.S64; + bool isSmallInt = dstType == IDstFmt.U16 || dstType == IDstFmt.S16; + + Operand srcB = context.FPAbsNeg(src, absolute, negate, fpType); + + srcB = roundingMode switch + { + RoundMode2.Round => context.FPRound(srcB, fpType), + RoundMode2.Floor => context.FPFloor(srcB, fpType), + RoundMode2.Ceil => context.FPCeiling(srcB, fpType), + RoundMode2.Trunc => context.FPTruncate(srcB, fpType), + _ => srcB + }; + + if (!isSignedInt) + { + // Negative float to uint cast is undefined, so we clamp the value before conversion. + Operand c0 = srcType == DstFmt.F64 ? context.PackDouble2x32(0.0) : ConstF(0); + + srcB = context.FPMaximum(srcB, c0, fpType); + } + + if (srcType == DstFmt.F64) + { + srcB = isSignedInt + ? context.FP64ConvertToS32(srcB) + : context.FP64ConvertToU32(srcB); + } + else + { + srcB = isSignedInt + ? context.FP32ConvertToS32(srcB) + : context.FP32ConvertToU32(srcB); + } + + if (isSmallInt) + { + int min = (int)GetIntMin(dstType); + int max = (int)GetIntMax(dstType); + + srcB = isSignedInt + ? context.IClampS32(srcB, Const(min), Const(max)) + : context.IClampU32(srcB, Const(min), Const(max)); + } + + Operand dest = GetDest(rd); + + context.Copy(dest, srcB); + + // TODO: CC. + } + + private static void EmitI2F( + EmitterContext context, + ISrcFmt srcType, + DstFmt dstType, + Operand src, + ByteSel byteSelection, + int rd, + bool absolute, + bool negate) + { + bool isSignedInt = + srcType == ISrcFmt.S8 || + srcType == ISrcFmt.S16 || + srcType == ISrcFmt.S32 || + srcType == ISrcFmt.S64; + bool isSmallInt = + srcType == ISrcFmt.U16 || + srcType == ISrcFmt.S16 || + srcType == ISrcFmt.U8 || + srcType == ISrcFmt.S8; + + // TODO: Handle S/U64. + + Operand srcB = context.IAbsNeg(src, absolute, negate); + + if (isSmallInt) + { + int size = srcType == ISrcFmt.U16 || srcType == ISrcFmt.S16 ? 16 : 8; + + srcB = isSignedInt + ? context.BitfieldExtractS32(srcB, Const((int)byteSelection * 8), Const(size)) + : context.BitfieldExtractU32(srcB, Const((int)byteSelection * 8), Const(size)); + } + + if (dstType == DstFmt.F64) + { + srcB = isSignedInt + ? context.IConvertS32ToFP64(srcB) + : context.IConvertU32ToFP64(srcB); + } + else + { + srcB = isSignedInt + ? context.IConvertS32ToFP32(srcB) + : context.IConvertU32ToFP32(srcB); + } + + WriteFP(context, dstType, srcB, rd); + + // TODO: CC. + } + + private static void EmitI2I( + EmitterContext context, + ISrcDstFmt srcType, + ISrcDstFmt dstType, + Operand src, + ByteSel byteSelection, + int rd, + bool absolute, + bool negate, + bool saturate, + bool writeCC) + { + if ((srcType & ~ISrcDstFmt.S8) > ISrcDstFmt.U32 || (dstType & ~ISrcDstFmt.S8) > ISrcDstFmt.U32) + { + context.Config.GpuAccessor.Log("Invalid I2I encoding."); + return; + } + + bool srcIsSignedInt = + srcType == ISrcDstFmt.S8 || + srcType == ISrcDstFmt.S16 || + srcType == ISrcDstFmt.S32; + bool dstIsSignedInt = + dstType == ISrcDstFmt.S8 || + dstType == ISrcDstFmt.S16 || + dstType == ISrcDstFmt.S32; + bool srcIsSmallInt = + srcType == ISrcDstFmt.U16 || + srcType == ISrcDstFmt.S16 || + srcType == ISrcDstFmt.U8 || + srcType == ISrcDstFmt.S8; + + if (srcIsSmallInt) + { + int size = srcType == ISrcDstFmt.U16 || srcType == ISrcDstFmt.S16 ? 16 : 8; + + src = srcIsSignedInt + ? context.BitfieldExtractS32(src, Const((int)byteSelection * 8), Const(size)) + : context.BitfieldExtractU32(src, Const((int)byteSelection * 8), Const(size)); + } + + src = context.IAbsNeg(src, absolute, negate); + + if (saturate) + { + int min = (int)GetIntMin(dstType); + int max = (int)GetIntMax(dstType); + + src = dstIsSignedInt + ? context.IClampS32(src, Const(min), Const(max)) + : context.IClampU32(src, Const(min), Const(max)); + } + + context.Copy(GetDest(rd), src); + + SetZnFlags(context, src, writeCC); + } + + private static Operand UnpackReg(EmitterContext context, DstFmt floatType, bool h, int reg) + { + if (floatType == DstFmt.F32) + { + return GetSrcReg(context, reg); + } + else if (floatType == DstFmt.F16) + { + return GetHalfUnpacked(context, GetSrcReg(context, reg), HalfSwizzle.F16)[h ? 1 : 0]; + } + else if (floatType == DstFmt.F64) + { + return GetSrcReg(context, reg, isFP64: true); + } + + throw new ArgumentException($"Invalid floating point type \"{floatType}\"."); + } + + private static Operand UnpackCbuf(EmitterContext context, DstFmt floatType, bool h, int cbufSlot, int cbufOffset) + { + if (floatType == DstFmt.F32) + { + return GetSrcCbuf(context, cbufSlot, cbufOffset); + } + else if (floatType == DstFmt.F16) + { + return GetHalfUnpacked(context, GetSrcCbuf(context, cbufSlot, cbufOffset), HalfSwizzle.F16)[h ? 1 : 0]; + } + else if (floatType == DstFmt.F64) + { + return GetSrcCbuf(context, cbufSlot, cbufOffset, isFP64: true); + } + + throw new ArgumentException($"Invalid floating point type \"{floatType}\"."); + } + + private static Operand UnpackImm(EmitterContext context, DstFmt floatType, bool h, int imm) + { + if (floatType == DstFmt.F32) + { + return GetSrcImm(context, imm); + } + else if (floatType == DstFmt.F16) + { + return GetHalfUnpacked(context, GetSrcImm(context, imm), HalfSwizzle.F16)[h ? 1 : 0]; + } + else if (floatType == DstFmt.F64) + { + return GetSrcImm(context, imm, isFP64: true); + } + + throw new ArgumentException($"Invalid floating point type \"{floatType}\"."); + } + + private static void WriteFP(EmitterContext context, DstFmt type, Operand srcB, int rd) + { + Operand dest = GetDest(rd); + + if (type == DstFmt.F32) + { + context.Copy(dest, srcB); + } + else if (type == DstFmt.F16) + { + context.Copy(dest, context.PackHalf2x16(srcB, ConstF(0))); + } + else /* if (type == FPType.FP64) */ + { + Operand dest2 = GetDest2(rd); + + context.Copy(dest, context.UnpackDouble2x32Low(srcB)); + context.Copy(dest2, context.UnpackDouble2x32High(srcB)); + } + } + + private static Instruction ToInstFPType(this DstFmt type) + { + return type == DstFmt.F64 ? Instruction.FP64 : Instruction.FP32; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatArithmetic.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatArithmetic.cs new file mode 100644 index 00000000..29803c31 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatArithmetic.cs @@ -0,0 +1,532 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper; +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void DaddR(EmitterContext context) + { + InstDaddR op = context.GetOp<InstDaddR>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcReg(context, op.SrcB, isFP64: true); + + EmitFadd(context, Instruction.FP64, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC); + } + + public static void DaddI(EmitterContext context) + { + InstDaddI op = context.GetOp<InstDaddI>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true); + + EmitFadd(context, Instruction.FP64, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC); + } + + public static void DaddC(EmitterContext context) + { + InstDaddC op = context.GetOp<InstDaddC>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true); + + EmitFadd(context, Instruction.FP64, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC); + } + + public static void DfmaR(EmitterContext context) + { + InstDfmaR op = context.GetOp<InstDfmaR>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcReg(context, op.SrcB, isFP64: true); + var srcC = GetSrcReg(context, op.SrcC, isFP64: true); + + EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC); + } + + public static void DfmaI(EmitterContext context) + { + InstDfmaI op = context.GetOp<InstDfmaI>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true); + var srcC = GetSrcReg(context, op.SrcC, isFP64: true); + + EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC); + } + + public static void DfmaC(EmitterContext context) + { + InstDfmaC op = context.GetOp<InstDfmaC>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true); + var srcC = GetSrcReg(context, op.SrcC, isFP64: true); + + EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC); + } + + public static void DfmaRc(EmitterContext context) + { + InstDfmaRc op = context.GetOp<InstDfmaRc>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcReg(context, op.SrcC, isFP64: true); + var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true); + + EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC); + } + + public static void DmulR(EmitterContext context) + { + InstDmulR op = context.GetOp<InstDmulR>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcReg(context, op.SrcB, isFP64: true); + + EmitFmul(context, Instruction.FP64, MultiplyScale.NoScale, srcA, srcB, op.Dest, op.NegA, false, op.WriteCC); + } + + public static void DmulI(EmitterContext context) + { + InstDmulI op = context.GetOp<InstDmulI>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true); + + EmitFmul(context, Instruction.FP64, MultiplyScale.NoScale, srcA, srcB, op.Dest, op.NegA, false, op.WriteCC); + } + + public static void DmulC(EmitterContext context) + { + InstDmulC op = context.GetOp<InstDmulC>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true); + + EmitFmul(context, Instruction.FP64, MultiplyScale.NoScale, srcA, srcB, op.Dest, op.NegA, false, op.WriteCC); + } + + public static void FaddR(EmitterContext context) + { + InstFaddR op = context.GetOp<InstFaddR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, op.Sat, op.WriteCC); + } + + public static void FaddI(EmitterContext context) + { + InstFaddI op = context.GetOp<InstFaddI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20)); + + EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, op.Sat, op.WriteCC); + } + + public static void FaddC(EmitterContext context) + { + InstFaddC op = context.GetOp<InstFaddC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, op.Sat, op.WriteCC); + } + + public static void Fadd32i(EmitterContext context) + { + InstFadd32i op = context.GetOp<InstFadd32i>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, op.Imm32); + + EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC); + } + + public static void FfmaR(EmitterContext context) + { + InstFfmaR op = context.GetOp<InstFfmaR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcC = GetSrcReg(context, op.SrcC); + + EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC); + } + + public static void FfmaI(EmitterContext context) + { + InstFfmaI op = context.GetOp<InstFfmaI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20)); + var srcC = GetSrcReg(context, op.SrcC); + + EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC); + } + + public static void FfmaC(EmitterContext context) + { + InstFfmaC op = context.GetOp<InstFfmaC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + var srcC = GetSrcReg(context, op.SrcC); + + EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC); + } + + public static void FfmaRc(EmitterContext context) + { + InstFfmaRc op = context.GetOp<InstFfmaRc>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcC); + var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC); + } + + public static void Ffma32i(EmitterContext context) + { + InstFfma32i op = context.GetOp<InstFfma32i>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, op.Imm32); + var srcC = GetSrcReg(context, op.Dest); + + EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC); + } + + public static void FmulR(EmitterContext context) + { + InstFmulR op = context.GetOp<InstFmulR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitFmul(context, Instruction.FP32, op.Scale, srcA, srcB, op.Dest, op.NegA, op.Sat, op.WriteCC); + } + + public static void FmulI(EmitterContext context) + { + InstFmulI op = context.GetOp<InstFmulI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20)); + + EmitFmul(context, Instruction.FP32, op.Scale, srcA, srcB, op.Dest, op.NegA, op.Sat, op.WriteCC); + } + + public static void FmulC(EmitterContext context) + { + InstFmulC op = context.GetOp<InstFmulC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitFmul(context, Instruction.FP32, op.Scale, srcA, srcB, op.Dest, op.NegA, op.Sat, op.WriteCC); + } + + public static void Fmul32i(EmitterContext context) + { + InstFmul32i op = context.GetOp<InstFmul32i>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, op.Imm32); + + EmitFmul(context, Instruction.FP32, MultiplyScale.NoScale, srcA, srcB, op.Dest, false, op.Sat, op.WriteCC); + } + + public static void Hadd2R(EmitterContext context) + { + InstHadd2R op = context.GetOp<InstHadd2R>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA); + var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegB, op.AbsB); + + EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: true, op.Dest, op.Sat); + } + + public static void Hadd2I(EmitterContext context) + { + InstHadd2I op = context.GetOp<InstHadd2I>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA); + var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1); + + EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: true, op.Dest, op.Sat); + } + + public static void Hadd2C(EmitterContext context) + { + InstHadd2C op = context.GetOp<InstHadd2C>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA); + var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegB, op.AbsB); + + EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: true, op.Dest, op.Sat); + } + + public static void Hadd232i(EmitterContext context) + { + InstHadd232i op = context.GetOp<InstHadd232i>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, false); + var srcB = GetHalfSrc(context, op.Imm); + + EmitHadd2Hmul2(context, OFmt.F16, srcA, srcB, isAdd: true, op.Dest, op.Sat); + } + + public static void Hfma2R(EmitterContext context) + { + InstHfma2R op = context.GetOp<InstHfma2R>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false); + var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegA, false); + var srcC = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegC, false); + + EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat); + } + + public static void Hfma2I(EmitterContext context) + { + InstHfma2I op = context.GetOp<InstHfma2I>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false); + var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1); + var srcC = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegC, false); + + EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat); + } + + public static void Hfma2C(EmitterContext context) + { + InstHfma2C op = context.GetOp<InstHfma2C>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false); + var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegA, false); + var srcC = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegC, false); + + EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat); + } + + public static void Hfma2Rc(EmitterContext context) + { + InstHfma2Rc op = context.GetOp<InstHfma2Rc>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false); + var srcB = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegA, false); + var srcC = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegC, false); + + EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat); + } + + public static void Hfma232i(EmitterContext context) + { + InstHfma232i op = context.GetOp<InstHfma232i>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false); + var srcB = GetHalfSrc(context, op.Imm); + var srcC = GetHalfSrc(context, HalfSwizzle.F16, op.Dest, op.NegC, false); + + EmitHfma2(context, OFmt.F16, srcA, srcB, srcC, op.Dest, saturate: false); + } + + public static void Hmul2R(EmitterContext context) + { + InstHmul2R op = context.GetOp<InstHmul2R>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, op.AbsA); + var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegA, op.AbsB); + + EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: false, op.Dest, op.Sat); + } + + public static void Hmul2I(EmitterContext context) + { + InstHmul2I op = context.GetOp<InstHmul2I>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA); + var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1); + + EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: false, op.Dest, op.Sat); + } + + public static void Hmul2C(EmitterContext context) + { + InstHmul2C op = context.GetOp<InstHmul2C>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, op.AbsA); + var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegA, op.AbsB); + + EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: false, op.Dest, op.Sat); + } + + public static void Hmul232i(EmitterContext context) + { + InstHmul232i op = context.GetOp<InstHmul232i>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false); + var srcB = GetHalfSrc(context, op.Imm32); + + EmitHadd2Hmul2(context, OFmt.F16, srcA, srcB, isAdd: false, op.Dest, op.Sat); + } + + private static void EmitFadd( + EmitterContext context, + Instruction fpType, + Operand srcA, + Operand srcB, + int rd, + bool negateA, + bool negateB, + bool absoluteA, + bool absoluteB, + bool saturate, + bool writeCC) + { + bool isFP64 = fpType == Instruction.FP64; + + srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType); + srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType); + + Operand res = context.FPSaturate(context.FPAdd(srcA, srcB, fpType), saturate, fpType); + + SetDest(context, res, rd, isFP64); + + SetFPZnFlags(context, res, writeCC, fpType); + } + + private static void EmitFfma( + EmitterContext context, + Instruction fpType, + Operand srcA, + Operand srcB, + Operand srcC, + int rd, + bool negateB, + bool negateC, + bool saturate, + bool writeCC) + { + bool isFP64 = fpType == Instruction.FP64; + + srcB = context.FPNegate(srcB, negateB, fpType); + srcC = context.FPNegate(srcC, negateC, fpType); + + Operand res = context.FPSaturate(context.FPFusedMultiplyAdd(srcA, srcB, srcC, fpType), saturate, fpType); + + SetDest(context, res, rd, isFP64); + + SetFPZnFlags(context, res, writeCC, fpType); + } + + private static void EmitFmul( + EmitterContext context, + Instruction fpType, + MultiplyScale scale, + Operand srcA, + Operand srcB, + int rd, + bool negateB, + bool saturate, + bool writeCC) + { + bool isFP64 = fpType == Instruction.FP64; + + srcB = context.FPNegate(srcB, negateB, fpType); + + if (scale != MultiplyScale.NoScale) + { + Operand scaleConst = scale switch + { + MultiplyScale.D2 => ConstF(0.5f), + MultiplyScale.D4 => ConstF(0.25f), + MultiplyScale.D8 => ConstF(0.125f), + MultiplyScale.M2 => ConstF(2f), + MultiplyScale.M4 => ConstF(4f), + MultiplyScale.M8 => ConstF(8f), + _ => ConstF(1f) // Invalid, behave as if it had no scale. + }; + + if (scaleConst.AsFloat() == 1f) + { + context.Config.GpuAccessor.Log($"Invalid FP multiply scale \"{scale}\"."); + } + + if (isFP64) + { + scaleConst = context.FP32ConvertToFP64(scaleConst); + } + + srcA = context.FPMultiply(srcA, scaleConst, fpType); + } + + Operand res = context.FPSaturate(context.FPMultiply(srcA, srcB, fpType), saturate, fpType); + + SetDest(context, res, rd, isFP64); + + SetFPZnFlags(context, res, writeCC, fpType); + } + + private static void EmitHadd2Hmul2( + EmitterContext context, + OFmt swizzle, + Operand[] srcA, + Operand[] srcB, + bool isAdd, + int rd, + bool saturate) + { + Operand[] res = new Operand[2]; + + for (int index = 0; index < res.Length; index++) + { + if (isAdd) + { + res[index] = context.FPAdd(srcA[index], srcB[index]); + } + else + { + res[index] = context.FPMultiply(srcA[index], srcB[index]); + } + + res[index] = context.FPSaturate(res[index], saturate); + } + + context.Copy(GetDest(rd), GetHalfPacked(context, swizzle, res, rd)); + } + + public static void EmitHfma2( + EmitterContext context, + OFmt swizzle, + Operand[] srcA, + Operand[] srcB, + Operand[] srcC, + int rd, + bool saturate) + { + Operand[] res = new Operand[2]; + + for (int index = 0; index < res.Length; index++) + { + res[index] = context.FPFusedMultiplyAdd(srcA[index], srcB[index], srcC[index]); + res[index] = context.FPSaturate(res[index], saturate); + } + + context.Copy(GetDest(rd), GetHalfPacked(context, swizzle, res, rd)); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatComparison.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatComparison.cs new file mode 100644 index 00000000..8f99ddb3 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatComparison.cs @@ -0,0 +1,575 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper; +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void DsetR(EmitterContext context) + { + InstDsetR op = context.GetOp<InstDsetR>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcReg(context, op.SrcB, isFP64: true); + + EmitFset( + context, + op.FComp, + op.Bop, + srcA, + srcB, + op.SrcPred, + op.SrcPredInv, + op.Dest, + op.AbsA, + op.AbsB, + op.NegA, + op.NegB, + op.BVal, + op.WriteCC, + isFP64: true); + } + + public static void DsetI(EmitterContext context) + { + InstDsetI op = context.GetOp<InstDsetI>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true); + + EmitFset( + context, + op.FComp, + op.Bop, + srcA, + srcB, + op.SrcPred, + op.SrcPredInv, + op.Dest, + op.AbsA, + op.AbsB, + op.NegA, + op.NegB, + op.BVal, + op.WriteCC, + isFP64: true); + } + + public static void DsetC(EmitterContext context) + { + InstDsetC op = context.GetOp<InstDsetC>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true); + + EmitFset( + context, + op.FComp, + op.Bop, + srcA, + srcB, + op.SrcPred, + op.SrcPredInv, + op.Dest, + op.AbsA, + op.AbsB, + op.NegA, + op.NegB, + op.BVal, + op.WriteCC, + isFP64: true); + } + + public static void DsetpR(EmitterContext context) + { + InstDsetpR op = context.GetOp<InstDsetpR>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcReg(context, op.SrcB, isFP64: true); + + EmitFsetp( + context, + op.FComp, + op.Bop, + srcA, + srcB, + op.SrcPred, + op.SrcPredInv, + op.DestPred, + op.DestPredInv, + op.AbsA, + op.AbsB, + op.NegA, + op.NegB, + writeCC: false, + isFP64: true); + } + + public static void DsetpI(EmitterContext context) + { + InstDsetpI op = context.GetOp<InstDsetpI>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true); + + EmitFsetp( + context, + op.FComp, + op.Bop, + srcA, + srcB, + op.SrcPred, + op.SrcPredInv, + op.DestPred, + op.DestPredInv, + op.AbsA, + op.AbsB, + op.NegA, + op.NegB, + writeCC: false, + isFP64: true); + } + + public static void DsetpC(EmitterContext context) + { + InstDsetpC op = context.GetOp<InstDsetpC>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true); + + EmitFsetp( + context, + op.FComp, + op.Bop, + srcA, + srcB, + op.SrcPred, + op.SrcPredInv, + op.DestPred, + op.DestPredInv, + op.AbsA, + op.AbsB, + op.NegA, + op.NegB, + writeCC: false, + isFP64: true); + } + + public static void FcmpR(EmitterContext context) + { + InstFcmpR op = context.GetOp<InstFcmpR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcC = GetSrcReg(context, op.SrcC); + + EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest); + } + + public static void FcmpI(EmitterContext context) + { + InstFcmpI op = context.GetOp<InstFcmpI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20)); + var srcC = GetSrcReg(context, op.SrcC); + + EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest); + } + + public static void FcmpC(EmitterContext context) + { + InstFcmpC op = context.GetOp<InstFcmpC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + var srcC = GetSrcReg(context, op.SrcC); + + EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest); + } + + public static void FcmpRc(EmitterContext context) + { + InstFcmpRc op = context.GetOp<InstFcmpRc>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcC); + var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest); + } + + public static void FsetR(EmitterContext context) + { + InstFsetR op = context.GetOp<InstFsetR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitFset(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.BVal, op.WriteCC); + } + + public static void FsetC(EmitterContext context) + { + InstFsetC op = context.GetOp<InstFsetC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitFset(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.BVal, op.WriteCC); + } + + public static void FsetI(EmitterContext context) + { + InstFsetI op = context.GetOp<InstFsetI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20)); + + EmitFset(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.BVal, op.WriteCC); + } + + public static void FsetpR(EmitterContext context) + { + InstFsetpR op = context.GetOp<InstFsetpR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitFsetp( + context, + op.FComp, + op.Bop, + srcA, + srcB, + op.SrcPred, + op.SrcPredInv, + op.DestPred, + op.DestPredInv, + op.AbsA, + op.AbsB, + op.NegA, + op.NegB, + op.WriteCC); + } + + public static void FsetpI(EmitterContext context) + { + InstFsetpI op = context.GetOp<InstFsetpI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20)); + + EmitFsetp( + context, + op.FComp, + op.Bop, + srcA, + srcB, + op.SrcPred, + op.SrcPredInv, + op.DestPred, + op.DestPredInv, + op.AbsA, + op.AbsB, + op.NegA, + op.NegB, + op.WriteCC); + } + + public static void FsetpC(EmitterContext context) + { + InstFsetpC op = context.GetOp<InstFsetpC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitFsetp( + context, + op.FComp, + op.Bop, + srcA, + srcB, + op.SrcPred, + op.SrcPredInv, + op.DestPred, + op.DestPredInv, + op.AbsA, + op.AbsB, + op.NegA, + op.NegB, + op.WriteCC); + } + + public static void Hset2R(EmitterContext context) + { + InstHset2R op = context.GetOp<InstHset2R>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA); + var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegB, op.AbsB); + + EmitHset2(context, op.Cmp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.Bval); + } + + public static void Hset2I(EmitterContext context) + { + InstHset2I op = context.GetOp<InstHset2I>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA); + var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1); + + EmitHset2(context, op.Cmp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.Bval); + } + + public static void Hset2C(EmitterContext context) + { + InstHset2C op = context.GetOp<InstHset2C>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA); + var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegB, false); + + EmitHset2(context, op.Cmp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.Bval); + } + + public static void Hsetp2R(EmitterContext context) + { + InstHsetp2R op = context.GetOp<InstHsetp2R>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA); + var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegB, op.AbsB); + + EmitHsetp2(context, op.FComp2, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.HAnd); + } + + public static void Hsetp2I(EmitterContext context) + { + InstHsetp2I op = context.GetOp<InstHsetp2I>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA); + var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1); + + EmitHsetp2(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.HAnd); + } + + public static void Hsetp2C(EmitterContext context) + { + InstHsetp2C op = context.GetOp<InstHsetp2C>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA); + var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegB, op.AbsB); + + EmitHsetp2(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.HAnd); + } + + private static void EmitFcmp(EmitterContext context, FComp cmpOp, Operand srcA, Operand srcB, Operand srcC, int rd) + { + Operand cmpRes = GetFPComparison(context, cmpOp, srcC, ConstF(0)); + + Operand res = context.ConditionalSelect(cmpRes, srcA, srcB); + + context.Copy(GetDest(rd), res); + } + + private static void EmitFset( + EmitterContext context, + FComp cmpOp, + BoolOp logicOp, + Operand srcA, + Operand srcB, + int srcPred, + bool srcPredInv, + int rd, + bool absoluteA, + bool absoluteB, + bool negateA, + bool negateB, + bool boolFloat, + bool writeCC, + bool isFP64 = false) + { + Instruction fpType = isFP64 ? Instruction.FP64 : Instruction.FP32; + + srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType); + srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType); + + Operand res = GetFPComparison(context, cmpOp, srcA, srcB, fpType); + Operand pred = GetPredicate(context, srcPred, srcPredInv); + + res = GetPredLogicalOp(context, logicOp, res, pred); + + Operand dest = GetDest(rd); + + if (boolFloat) + { + res = context.ConditionalSelect(res, ConstF(1), Const(0)); + + context.Copy(dest, res); + + SetFPZnFlags(context, res, writeCC); + } + else + { + context.Copy(dest, res); + + SetZnFlags(context, res, writeCC, extended: false); + } + } + + private static void EmitFsetp( + EmitterContext context, + FComp cmpOp, + BoolOp logicOp, + Operand srcA, + Operand srcB, + int srcPred, + bool srcPredInv, + int destPred, + int destPredInv, + bool absoluteA, + bool absoluteB, + bool negateA, + bool negateB, + bool writeCC, + bool isFP64 = false) + { + Instruction fpType = isFP64 ? Instruction.FP64 : Instruction.FP32; + + srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType); + srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType); + + Operand p0Res = GetFPComparison(context, cmpOp, srcA, srcB, fpType); + Operand p1Res = context.BitwiseNot(p0Res); + Operand pred = GetPredicate(context, srcPred, srcPredInv); + + p0Res = GetPredLogicalOp(context, logicOp, p0Res, pred); + p1Res = GetPredLogicalOp(context, logicOp, p1Res, pred); + + context.Copy(Register(destPred, RegisterType.Predicate), p0Res); + context.Copy(Register(destPredInv, RegisterType.Predicate), p1Res); + } + + private static void EmitHset2( + EmitterContext context, + FComp cmpOp, + BoolOp logicOp, + Operand[] srcA, + Operand[] srcB, + int srcPred, + bool srcPredInv, + int rd, + bool boolFloat) + { + Operand[] res = new Operand[2]; + + res[0] = GetFPComparison(context, cmpOp, srcA[0], srcB[0]); + res[1] = GetFPComparison(context, cmpOp, srcA[1], srcB[1]); + + Operand pred = GetPredicate(context, srcPred, srcPredInv); + + res[0] = GetPredLogicalOp(context, logicOp, res[0], pred); + res[1] = GetPredLogicalOp(context, logicOp, res[1], pred); + + if (boolFloat) + { + res[0] = context.ConditionalSelect(res[0], ConstF(1), Const(0)); + res[1] = context.ConditionalSelect(res[1], ConstF(1), Const(0)); + + context.Copy(GetDest(rd), context.PackHalf2x16(res[0], res[1])); + } + else + { + Operand low = context.BitwiseAnd(res[0], Const(0xffff)); + Operand high = context.ShiftLeft (res[1], Const(16)); + + Operand packed = context.BitwiseOr(low, high); + + context.Copy(GetDest(rd), packed); + } + } + + private static void EmitHsetp2( + EmitterContext context, + FComp cmpOp, + BoolOp logicOp, + Operand[] srcA, + Operand[] srcB, + int srcPred, + bool srcPredInv, + int destPred, + int destPredInv, + bool hAnd) + { + Operand p0Res = GetFPComparison(context, cmpOp, srcA[0], srcB[0]); + Operand p1Res = GetFPComparison(context, cmpOp, srcA[1], srcB[1]); + + if (hAnd) + { + p0Res = context.BitwiseAnd(p0Res, p1Res); + p1Res = context.BitwiseNot(p0Res); + } + + Operand pred = GetPredicate(context, srcPred, srcPredInv); + + p0Res = GetPredLogicalOp(context, logicOp, p0Res, pred); + p1Res = GetPredLogicalOp(context, logicOp, p1Res, pred); + + context.Copy(Register(destPred, RegisterType.Predicate), p0Res); + context.Copy(Register(destPredInv, RegisterType.Predicate), p1Res); + } + + private static Operand GetFPComparison(EmitterContext context, FComp cond, Operand srcA, Operand srcB, Instruction fpType = Instruction.FP32) + { + Operand res; + + if (cond == FComp.T) + { + res = Const(IrConsts.True); + } + else if (cond == FComp.F) + { + res = Const(IrConsts.False); + } + else if (cond == FComp.Nan || cond == FComp.Num) + { + res = context.BitwiseOr(context.IsNan(srcA, fpType), context.IsNan(srcB, fpType)); + + if (cond == FComp.Num) + { + res = context.BitwiseNot(res); + } + } + else + { + Instruction inst; + + switch (cond & ~FComp.Nan) + { + case FComp.Lt: inst = Instruction.CompareLess; break; + case FComp.Eq: inst = Instruction.CompareEqual; break; + case FComp.Le: inst = Instruction.CompareLessOrEqual; break; + case FComp.Gt: inst = Instruction.CompareGreater; break; + case FComp.Ne: inst = Instruction.CompareNotEqual; break; + case FComp.Ge: inst = Instruction.CompareGreaterOrEqual; break; + + default: throw new ArgumentException($"Unexpected condition \"{cond}\"."); + } + + res = context.Add(inst | fpType, Local(), srcA, srcB); + + if ((cond & FComp.Nan) != 0) + { + res = context.BitwiseOr(res, context.IsNan(srcA, fpType)); + res = context.BitwiseOr(res, context.IsNan(srcB, fpType)); + } + } + + return res; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatMinMax.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatMinMax.cs new file mode 100644 index 00000000..412a5305 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatMinMax.cs @@ -0,0 +1,106 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper; +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void DmnmxR(EmitterContext context) + { + InstDmnmxR op = context.GetOp<InstDmnmxR>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcReg(context, op.SrcB, isFP64: true); + var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC, isFP64: true); + } + + public static void DmnmxI(EmitterContext context) + { + InstDmnmxI op = context.GetOp<InstDmnmxI>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true); + var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC, isFP64: true); + } + + public static void DmnmxC(EmitterContext context) + { + InstDmnmxC op = context.GetOp<InstDmnmxC>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true); + var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC, isFP64: true); + } + + public static void FmnmxR(EmitterContext context) + { + InstFmnmxR op = context.GetOp<InstFmnmxR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC); + } + + public static void FmnmxI(EmitterContext context) + { + InstFmnmxI op = context.GetOp<InstFmnmxI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20)); + var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC); + } + + public static void FmnmxC(EmitterContext context) + { + InstFmnmxC op = context.GetOp<InstFmnmxC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC); + } + + private static void EmitFmnmx( + EmitterContext context, + Operand srcA, + Operand srcB, + Operand srcPred, + int rd, + bool absoluteA, + bool absoluteB, + bool negateA, + bool negateB, + bool writeCC, + bool isFP64 = false) + { + Instruction fpType = isFP64 ? Instruction.FP64 : Instruction.FP32; + + srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType); + srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType); + + Operand resMin = context.FPMinimum(srcA, srcB, fpType); + Operand resMax = context.FPMaximum(srcA, srcB, fpType); + + Operand res = context.ConditionalSelect(srcPred, resMin, resMax); + + SetDest(context, res, rd, isFP64); + + SetFPZnFlags(context, res, writeCC, fpType); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs new file mode 100644 index 00000000..91c23230 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs @@ -0,0 +1,322 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System.Collections.Generic; +using System.Linq; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void Bra(EmitterContext context) + { + InstBra op = context.GetOp<InstBra>(); + + EmitBranch(context, context.CurrBlock.Successors[^1].Address); + } + + public static void Brk(EmitterContext context) + { + InstBrk op = context.GetOp<InstBrk>(); + + EmitBrkContSync(context); + } + + public static void Brx(EmitterContext context) + { + InstBrx op = context.GetOp<InstBrx>(); + InstOp currOp = context.CurrOp; + int startIndex = context.CurrBlock.HasNext() ? 1 : 0; + + if (context.CurrBlock.Successors.Count <= startIndex) + { + context.Config.GpuAccessor.Log($"Failed to find targets for BRX instruction at 0x{currOp.Address:X}."); + return; + } + + int offset = (int)currOp.GetAbsoluteAddress(); + + Operand address = context.IAdd(Register(op.SrcA, RegisterType.Gpr), Const(offset)); + + var targets = context.CurrBlock.Successors.Skip(startIndex); + + bool allTargetsSinglePred = true; + int total = context.CurrBlock.Successors.Count - startIndex; + int count = 0; + + foreach (var target in targets.OrderBy(x => x.Address)) + { + if (++count < total && (target.Predecessors.Count > 1 || target.Address <= context.CurrBlock.Address)) + { + allTargetsSinglePred = false; + break; + } + } + + if (allTargetsSinglePred) + { + // Chain blocks, each target block will check if the BRX target address + // matches its own address, if not, it jumps to the next target which will do the same check, + // until it reaches the last possible target, which executed unconditionally. + // We can only do this if the BRX block is the only predecessor of all target blocks. + // Additionally, this is not supported for blocks located before the current block, + // since it will be too late to insert a label, but this is something that can be improved + // in the future if necessary. + + var sortedTargets = targets.OrderBy(x => x.Address); + + Block currentTarget = null; + ulong firstTargetAddress = 0; + + foreach (Block nextTarget in sortedTargets) + { + if (currentTarget != null) + { + if (currentTarget.Address != nextTarget.Address) + { + context.SetBrxTarget(currentTarget.Address, address, (int)currentTarget.Address, nextTarget.Address); + } + } + else + { + firstTargetAddress = nextTarget.Address; + } + + currentTarget = nextTarget; + } + + context.Branch(context.GetLabel(firstTargetAddress)); + } + else + { + // Emit the branches sequentially. + // This generates slightly worse code, but should work for all cases. + + var sortedTargets = targets.OrderByDescending(x => x.Address); + ulong lastTargetAddress = ulong.MaxValue; + + count = 0; + + foreach (Block target in sortedTargets) + { + Operand label = context.GetLabel(target.Address); + + if (++count < total) + { + if (target.Address != lastTargetAddress) + { + context.BranchIfTrue(label, context.ICompareEqual(address, Const((int)target.Address))); + } + + lastTargetAddress = target.Address; + } + else + { + context.Branch(label); + } + } + } + } + + public static void Cal(EmitterContext context) + { + InstCal op = context.GetOp<InstCal>(); + + DecodedFunction function = context.Program.GetFunctionByAddress(context.CurrOp.GetAbsoluteAddress()); + + if (function.IsCompilerGenerated) + { + switch (function.Type) + { + case FunctionType.BuiltInFSIBegin: + context.FSIBegin(); + break; + case FunctionType.BuiltInFSIEnd: + context.FSIEnd(); + break; + } + } + else + { + context.Call(function.Id, false); + } + } + + public static void Cont(EmitterContext context) + { + InstCont op = context.GetOp<InstCont>(); + + EmitBrkContSync(context); + } + + public static void Exit(EmitterContext context) + { + InstExit op = context.GetOp<InstExit>(); + + if (context.IsNonMain) + { + context.Config.GpuAccessor.Log("Invalid exit on non-main function."); + return; + } + + if (op.Ccc == Ccc.T) + { + context.Return(); + } + else + { + Operand cond = GetCondition(context, op.Ccc, IrConsts.False); + + // If the condition is always false, we don't need to do anything. + if (cond.Type != OperandType.Constant || cond.Value != IrConsts.False) + { + Operand lblSkip = Label(); + context.BranchIfFalse(lblSkip, cond); + context.Return(); + context.MarkLabel(lblSkip); + } + } + } + + public static void Kil(EmitterContext context) + { + InstKil op = context.GetOp<InstKil>(); + + context.Discard(); + } + + public static void Pbk(EmitterContext context) + { + InstPbk op = context.GetOp<InstPbk>(); + + EmitPbkPcntSsy(context); + } + + public static void Pcnt(EmitterContext context) + { + InstPcnt op = context.GetOp<InstPcnt>(); + + EmitPbkPcntSsy(context); + } + + public static void Ret(EmitterContext context) + { + InstRet op = context.GetOp<InstRet>(); + + if (context.IsNonMain) + { + context.Return(); + } + else + { + context.Config.GpuAccessor.Log("Invalid return on main function."); + } + } + + public static void Ssy(EmitterContext context) + { + InstSsy op = context.GetOp<InstSsy>(); + + EmitPbkPcntSsy(context); + } + + public static void Sync(EmitterContext context) + { + InstSync op = context.GetOp<InstSync>(); + + EmitBrkContSync(context); + } + + private static void EmitPbkPcntSsy(EmitterContext context) + { + var consumers = context.CurrBlock.PushOpCodes.First(x => x.Op.Address == context.CurrOp.Address).Consumers; + + foreach (KeyValuePair<Block, Operand> kv in consumers) + { + Block consumerBlock = kv.Key; + Operand local = kv.Value; + + int id = consumerBlock.SyncTargets[context.CurrOp.Address].PushOpId; + + context.Copy(local, Const(id)); + } + } + + private static void EmitBrkContSync(EmitterContext context) + { + var targets = context.CurrBlock.SyncTargets; + + if (targets.Count == 1) + { + // If we have only one target, then the SSY/PBK is basically + // a branch, we can produce better codegen for this case. + EmitBranch(context, targets.Values.First().PushOpInfo.Op.GetAbsoluteAddress()); + } + else + { + // TODO: Support CC here as well (condition). + foreach (SyncTarget target in targets.Values) + { + PushOpInfo pushOpInfo = target.PushOpInfo; + + Operand label = context.GetLabel(pushOpInfo.Op.GetAbsoluteAddress()); + Operand local = pushOpInfo.Consumers[context.CurrBlock]; + + context.BranchIfTrue(label, context.ICompareEqual(local, Const(target.PushOpId))); + } + } + } + + private static void EmitBranch(EmitterContext context, ulong address) + { + InstOp op = context.CurrOp; + InstConditional opCond = new InstConditional(op.RawOpCode); + + // If we're branching to the next instruction, then the branch + // is useless and we can ignore it. + if (address == op.Address + 8) + { + return; + } + + Operand label = context.GetLabel(address); + + Operand pred = Register(opCond.Pred, RegisterType.Predicate); + + if (opCond.Ccc != Ccc.T) + { + Operand cond = GetCondition(context, opCond.Ccc); + + if (opCond.Pred == RegisterConsts.PredicateTrueIndex) + { + pred = cond; + } + else if (opCond.PredInv) + { + pred = context.BitwiseAnd(context.BitwiseNot(pred), cond); + } + else + { + pred = context.BitwiseAnd(pred, cond); + } + + context.BranchIfTrue(label, pred); + } + else if (opCond.Pred == RegisterConsts.PredicateTrueIndex) + { + context.Branch(label); + } + else if (opCond.PredInv) + { + context.BranchIfFalse(label, pred); + } + else + { + context.BranchIfTrue(label, pred); + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitHelper.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitHelper.cs new file mode 100644 index 00000000..0ba4667e --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitHelper.cs @@ -0,0 +1,266 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Runtime.CompilerServices; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static class InstEmitHelper + { + public static Operand GetZF() + { + return Register(0, RegisterType.Flag); + } + + public static Operand GetNF() + { + return Register(1, RegisterType.Flag); + } + + public static Operand GetCF() + { + return Register(2, RegisterType.Flag); + } + + public static Operand GetVF() + { + return Register(3, RegisterType.Flag); + } + + public static Operand GetDest(int rd) + { + return Register(rd, RegisterType.Gpr); + } + + public static Operand GetDest2(int rd) + { + return Register(rd | 1, RegisterType.Gpr); + } + + public static Operand GetSrcCbuf(EmitterContext context, int cbufSlot, int cbufOffset, bool isFP64 = false) + { + if (isFP64) + { + return context.PackDouble2x32( + Cbuf(cbufSlot, cbufOffset), + Cbuf(cbufSlot, cbufOffset + 1)); + } + else + { + return Cbuf(cbufSlot, cbufOffset); + } + } + + public static Operand GetSrcImm(EmitterContext context, int imm, bool isFP64 = false) + { + if (isFP64) + { + return context.PackDouble2x32(Const(0), Const(imm)); + } + else + { + return Const(imm); + } + } + + public static Operand GetSrcReg(EmitterContext context, int reg, bool isFP64 = false) + { + if (isFP64) + { + return context.PackDouble2x32(Register(reg, RegisterType.Gpr), Register(reg | 1, RegisterType.Gpr)); + } + else + { + return Register(reg, RegisterType.Gpr); + } + } + + public static Operand[] GetHalfSrc( + EmitterContext context, + HalfSwizzle swizzle, + int ra, + bool negate, + bool absolute) + { + Operand[] operands = GetHalfUnpacked(context, GetSrcReg(context, ra), swizzle); + + return FPAbsNeg(context, operands, absolute, negate); + } + + public static Operand[] GetHalfSrc( + EmitterContext context, + HalfSwizzle swizzle, + int cbufSlot, + int cbufOffset, + bool negate, + bool absolute) + { + Operand[] operands = GetHalfUnpacked(context, GetSrcCbuf(context, cbufSlot, cbufOffset), swizzle); + + return FPAbsNeg(context, operands, absolute, negate); + } + + public static Operand[] GetHalfSrc(EmitterContext context, int immH0, int immH1) + { + ushort low = (ushort)(immH0 << 6); + ushort high = (ushort)(immH1 << 6); + + return new Operand[] + { + ConstF((float)Unsafe.As<ushort, Half>(ref low)), + ConstF((float)Unsafe.As<ushort, Half>(ref high)) + }; + } + + public static Operand[] GetHalfSrc(EmitterContext context, int imm32) + { + ushort low = (ushort)imm32; + ushort high = (ushort)(imm32 >> 16); + + return new Operand[] + { + ConstF((float)Unsafe.As<ushort, Half>(ref low)), + ConstF((float)Unsafe.As<ushort, Half>(ref high)) + }; + } + + public static Operand[] FPAbsNeg(EmitterContext context, Operand[] operands, bool abs, bool neg) + { + for (int index = 0; index < operands.Length; index++) + { + operands[index] = context.FPAbsNeg(operands[index], abs, neg); + } + + return operands; + } + + public static Operand[] GetHalfUnpacked(EmitterContext context, Operand src, HalfSwizzle swizzle) + { + switch (swizzle) + { + case HalfSwizzle.F16: + return new Operand[] + { + context.UnpackHalf2x16Low (src), + context.UnpackHalf2x16High(src) + }; + + case HalfSwizzle.F32: return new Operand[] { src, src }; + + case HalfSwizzle.H0H0: + return new Operand[] + { + context.UnpackHalf2x16Low(src), + context.UnpackHalf2x16Low(src) + }; + + case HalfSwizzle.H1H1: + return new Operand[] + { + context.UnpackHalf2x16High(src), + context.UnpackHalf2x16High(src) + }; + } + + throw new ArgumentException($"Invalid swizzle \"{swizzle}\"."); + } + + public static Operand GetHalfPacked(EmitterContext context, OFmt swizzle, Operand[] results, int rd) + { + switch (swizzle) + { + case OFmt.F16: return context.PackHalf2x16(results[0], results[1]); + + case OFmt.F32: return results[0]; + + case OFmt.MrgH0: + { + Operand h1 = GetHalfDest(context, rd, isHigh: true); + + return context.PackHalf2x16(results[0], h1); + } + + case OFmt.MrgH1: + { + Operand h0 = GetHalfDest(context, rd, isHigh: false); + + return context.PackHalf2x16(h0, results[1]); + } + } + + throw new ArgumentException($"Invalid swizzle \"{swizzle}\"."); + } + + public static Operand GetHalfDest(EmitterContext context, int rd, bool isHigh) + { + if (isHigh) + { + return context.UnpackHalf2x16High(GetDest(rd)); + } + else + { + return context.UnpackHalf2x16Low(GetDest(rd)); + } + } + + public static Operand GetPredicate(EmitterContext context, int pred, bool not) + { + Operand local = Register(pred, RegisterType.Predicate); + + if (not) + { + local = context.BitwiseNot(local); + } + + return local; + } + + public static void SetDest(EmitterContext context, Operand value, int rd, bool isFP64) + { + if (isFP64) + { + context.Copy(GetDest(rd), context.UnpackDouble2x32Low(value)); + context.Copy(GetDest2(rd), context.UnpackDouble2x32High(value)); + } + else + { + context.Copy(GetDest(rd), value); + } + } + + public static int Imm16ToSInt(int imm16) + { + return (short)imm16; + } + + public static int Imm20ToFloat(int imm20) + { + return imm20 << 12; + } + + public static int Imm20ToSInt(int imm20) + { + return (imm20 << 12) >> 12; + } + + public static int Imm24ToSInt(int imm24) + { + return (imm24 << 8) >> 8; + } + + public static Operand SignExtendTo32(EmitterContext context, Operand src, int srcBits) + { + return context.BitfieldExtractS32(src, Const(0), Const(srcBits)); + } + + public static Operand ZeroExtendTo32(EmitterContext context, Operand src, int srcBits) + { + int mask = (int)(uint.MaxValue >> (32 - srcBits)); + + return context.BitwiseAnd(src, Const(mask)); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerArithmetic.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerArithmetic.cs new file mode 100644 index 00000000..374e3d61 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerArithmetic.cs @@ -0,0 +1,699 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper; +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void IaddR(EmitterContext context) + { + InstIaddR op = context.GetOp<InstIaddR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC); + } + + public static void IaddI(EmitterContext context) + { + InstIaddI op = context.GetOp<InstIaddI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC); + } + + public static void IaddC(EmitterContext context) + { + InstIaddC op = context.GetOp<InstIaddC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC); + } + + public static void Iadd32i(EmitterContext context) + { + InstIadd32i op = context.GetOp<InstIadd32i>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, op.Imm32); + + EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC); + } + + public static void Iadd3R(EmitterContext context) + { + InstIadd3R op = context.GetOp<InstIadd3R>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcC = GetSrcReg(context, op.SrcC); + + EmitIadd3(context, op.Lrs, srcA, srcB, srcC, op.Apart, op.Bpart, op.Cpart, op.Dest, op.NegA, op.NegB, op.NegC); + } + + public static void Iadd3I(EmitterContext context) + { + InstIadd3I op = context.GetOp<InstIadd3I>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + var srcC = GetSrcReg(context, op.SrcC); + + EmitIadd3(context, Lrs.None, srcA, srcB, srcC, HalfSelect.B32, HalfSelect.B32, HalfSelect.B32, op.Dest, op.NegA, op.NegB, op.NegC); + } + + public static void Iadd3C(EmitterContext context) + { + InstIadd3C op = context.GetOp<InstIadd3C>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + var srcC = GetSrcReg(context, op.SrcC); + + EmitIadd3(context, Lrs.None, srcA, srcB, srcC, HalfSelect.B32, HalfSelect.B32, HalfSelect.B32, op.Dest, op.NegA, op.NegB, op.NegC); + } + + public static void ImadR(EmitterContext context) + { + InstImadR op = context.GetOp<InstImadR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcC = GetSrcReg(context, op.SrcC); + + EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo); + } + + public static void ImadI(EmitterContext context) + { + InstImadI op = context.GetOp<InstImadI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + var srcC = GetSrcReg(context, op.SrcC); + + EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo); + } + + public static void ImadC(EmitterContext context) + { + InstImadC op = context.GetOp<InstImadC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + var srcC = GetSrcReg(context, op.SrcC); + + EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo); + } + + public static void ImadRc(EmitterContext context) + { + InstImadRc op = context.GetOp<InstImadRc>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcC); + var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo); + } + + public static void Imad32i(EmitterContext context) + { + InstImad32i op = context.GetOp<InstImad32i>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, op.Imm32); + var srcC = GetSrcReg(context, op.Dest); + + EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo); + } + + public static void ImulR(EmitterContext context) + { + InstImulR op = context.GetOp<InstImulR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo); + } + + public static void ImulI(EmitterContext context) + { + InstImulI op = context.GetOp<InstImulI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo); + } + + public static void ImulC(EmitterContext context) + { + InstImulC op = context.GetOp<InstImulC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo); + } + + public static void Imul32i(EmitterContext context) + { + InstImul32i op = context.GetOp<InstImul32i>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, op.Imm32); + + EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo); + } + + public static void IscaddR(EmitterContext context) + { + InstIscaddR op = context.GetOp<InstIscaddR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, op.AvgMode, op.WriteCC); + } + + public static void IscaddI(EmitterContext context) + { + InstIscaddI op = context.GetOp<InstIscaddI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, op.AvgMode, op.WriteCC); + } + + public static void IscaddC(EmitterContext context) + { + InstIscaddC op = context.GetOp<InstIscaddC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, op.AvgMode, op.WriteCC); + } + + public static void Iscadd32i(EmitterContext context) + { + InstIscadd32i op = context.GetOp<InstIscadd32i>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, op.Imm32); + + EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, AvgMode.NoNeg, op.WriteCC); + } + + public static void LeaR(EmitterContext context) + { + InstLeaR op = context.GetOp<InstLeaR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitLea(context, srcA, srcB, op.Dest, op.NegA, op.ImmU5); + } + + public static void LeaI(EmitterContext context) + { + InstLeaI op = context.GetOp<InstLeaI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitLea(context, srcA, srcB, op.Dest, op.NegA, op.ImmU5); + } + + public static void LeaC(EmitterContext context) + { + InstLeaC op = context.GetOp<InstLeaC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitLea(context, srcA, srcB, op.Dest, op.NegA, op.ImmU5); + } + + public static void LeaHiR(EmitterContext context) + { + InstLeaHiR op = context.GetOp<InstLeaHiR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcC = GetSrcReg(context, op.SrcC); + + EmitLeaHi(context, srcA, srcB, srcC, op.Dest, op.NegA, op.ImmU5); + } + + public static void LeaHiC(EmitterContext context) + { + InstLeaHiC op = context.GetOp<InstLeaHiC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + var srcC = GetSrcReg(context, op.SrcC); + + EmitLeaHi(context, srcA, srcB, srcC, op.Dest, op.NegA, op.ImmU5); + } + + public static void XmadR(EmitterContext context) + { + InstXmadR op = context.GetOp<InstXmadR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcC = GetSrcReg(context, op.SrcC); + + EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, op.HiloB, op.Psl, op.Mrg, op.X, op.WriteCC); + } + + public static void XmadI(EmitterContext context) + { + InstXmadI op = context.GetOp<InstXmadI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, op.Imm16); + var srcC = GetSrcReg(context, op.SrcC); + + EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, false, op.Psl, op.Mrg, op.X, op.WriteCC); + } + + public static void XmadC(EmitterContext context) + { + InstXmadC op = context.GetOp<InstXmadC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + var srcC = GetSrcReg(context, op.SrcC); + + EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, op.HiloB, op.Psl, op.Mrg, op.X, op.WriteCC); + } + + public static void XmadRc(EmitterContext context) + { + InstXmadRc op = context.GetOp<InstXmadRc>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcC); + var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, op.HiloB, false, false, op.X, op.WriteCC); + } + + private static void EmitIadd( + EmitterContext context, + Operand srcA, + Operand srcB, + int rd, + AvgMode avgMode, + bool extended, + bool writeCC) + { + srcA = context.INegate(srcA, avgMode == AvgMode.NegA); + srcB = context.INegate(srcB, avgMode == AvgMode.NegB); + + Operand res = context.IAdd(srcA, srcB); + + if (extended) + { + res = context.IAdd(res, context.BitwiseAnd(GetCF(), Const(1))); + } + + SetIaddFlags(context, res, srcA, srcB, writeCC, extended); + + // TODO: SAT. + + context.Copy(GetDest(rd), res); + } + + private static void EmitIadd3( + EmitterContext context, + Lrs mode, + Operand srcA, + Operand srcB, + Operand srcC, + HalfSelect partA, + HalfSelect partB, + HalfSelect partC, + int rd, + bool negateA, + bool negateB, + bool negateC) + { + Operand Extend(Operand src, HalfSelect part) + { + if (part == HalfSelect.B32) + { + return src; + } + + if (part == HalfSelect.H0) + { + return context.BitwiseAnd(src, Const(0xffff)); + } + else if (part == HalfSelect.H1) + { + return context.ShiftRightU32(src, Const(16)); + } + else + { + context.Config.GpuAccessor.Log($"Iadd3 has invalid component selection {part}."); + } + + return src; + } + + srcA = context.INegate(Extend(srcA, partA), negateA); + srcB = context.INegate(Extend(srcB, partB), negateB); + srcC = context.INegate(Extend(srcC, partC), negateC); + + Operand res = context.IAdd(srcA, srcB); + + if (mode != Lrs.None) + { + if (mode == Lrs.LeftShift) + { + res = context.ShiftLeft(res, Const(16)); + } + else if (mode == Lrs.RightShift) + { + res = context.ShiftRightU32(res, Const(16)); + } + else + { + // TODO: Warning. + } + } + + res = context.IAdd(res, srcC); + + context.Copy(GetDest(rd), res); + + // TODO: CC, X, corner cases. + } + + private static void EmitImad( + EmitterContext context, + Operand srcA, + Operand srcB, + Operand srcC, + int rd, + AvgMode avgMode, + bool signedA, + bool signedB, + bool high) + { + srcB = context.INegate(srcB, avgMode == AvgMode.NegA); + srcC = context.INegate(srcC, avgMode == AvgMode.NegB); + + Operand res; + + if (high) + { + if (signedA && signedB) + { + res = context.MultiplyHighS32(srcA, srcB); + } + else + { + res = context.MultiplyHighU32(srcA, srcB); + + if (signedA) + { + res = context.IAdd(res, context.IMultiply(srcB, context.ShiftRightS32(srcA, Const(31)))); + } + else if (signedB) + { + res = context.IAdd(res, context.IMultiply(srcA, context.ShiftRightS32(srcB, Const(31)))); + } + } + } + else + { + res = context.IMultiply(srcA, srcB); + } + + if (srcC.Type != OperandType.Constant || srcC.Value != 0) + { + res = context.IAdd(res, srcC); + } + + // TODO: CC, X, SAT, and more? + + context.Copy(GetDest(rd), res); + } + + private static void EmitIscadd( + EmitterContext context, + Operand srcA, + Operand srcB, + int rd, + int shift, + AvgMode avgMode, + bool writeCC) + { + srcA = context.ShiftLeft(srcA, Const(shift)); + + srcA = context.INegate(srcA, avgMode == AvgMode.NegA); + srcB = context.INegate(srcB, avgMode == AvgMode.NegB); + + Operand res = context.IAdd(srcA, srcB); + + SetIaddFlags(context, res, srcA, srcB, writeCC, false); + + context.Copy(GetDest(rd), res); + } + + public static void EmitLea(EmitterContext context, Operand srcA, Operand srcB, int rd, bool negateA, int shift) + { + srcA = context.ShiftLeft(srcA, Const(shift)); + srcA = context.INegate(srcA, negateA); + + Operand res = context.IAdd(srcA, srcB); + + context.Copy(GetDest(rd), res); + + // TODO: CC, X. + } + + private static void EmitLeaHi( + EmitterContext context, + Operand srcA, + Operand srcB, + Operand srcC, + int rd, + bool negateA, + int shift) + { + Operand aLow = context.ShiftLeft(srcA, Const(shift)); + Operand aHigh = shift == 0 ? Const(0) : context.ShiftRightU32(srcA, Const(32 - shift)); + aHigh = context.BitwiseOr(aHigh, context.ShiftLeft(srcC, Const(shift))); + + if (negateA) + { + // Perform 64-bit negation by doing bitwise not of the value, + // then adding 1 and carrying over from low to high. + aLow = context.BitwiseNot(aLow); + aHigh = context.BitwiseNot(aHigh); + + aLow = AddWithCarry(context, aLow, Const(1), out Operand aLowCOut); + aHigh = context.IAdd(aHigh, aLowCOut); + } + + Operand res = context.IAdd(aHigh, srcB); + + context.Copy(GetDest(rd), res); + + // TODO: CC, X. + } + + public static void EmitXmad( + EmitterContext context, + XmadCop2 mode, + Operand srcA, + Operand srcB, + Operand srcC, + int rd, + bool signedA, + bool signedB, + bool highA, + bool highB, + bool productShiftLeft, + bool merge, + bool extended, + bool writeCC) + { + XmadCop modeConv; + switch (mode) + { + case XmadCop2.Cfull: + modeConv = XmadCop.Cfull; + break; + case XmadCop2.Clo: + modeConv = XmadCop.Clo; + break; + case XmadCop2.Chi: + modeConv = XmadCop.Chi; + break; + case XmadCop2.Csfu: + modeConv = XmadCop.Csfu; + break; + default: + context.Config.GpuAccessor.Log($"Invalid XMAD mode \"{mode}\"."); + return; + } + + EmitXmad(context, modeConv, srcA, srcB, srcC, rd, signedA, signedB, highA, highB, productShiftLeft, merge, extended, writeCC); + } + + public static void EmitXmad( + EmitterContext context, + XmadCop mode, + Operand srcA, + Operand srcB, + Operand srcC, + int rd, + bool signedA, + bool signedB, + bool highA, + bool highB, + bool productShiftLeft, + bool merge, + bool extended, + bool writeCC) + { + var srcBUnmodified = srcB; + + Operand Extend16To32(Operand src, bool high, bool signed) + { + if (signed && high) + { + return context.ShiftRightS32(src, Const(16)); + } + else if (signed) + { + return context.BitfieldExtractS32(src, Const(0), Const(16)); + } + else if (high) + { + return context.ShiftRightU32(src, Const(16)); + } + else + { + return context.BitwiseAnd(src, Const(0xffff)); + } + } + + srcA = Extend16To32(srcA, highA, signedA); + srcB = Extend16To32(srcB, highB, signedB); + + Operand res = context.IMultiply(srcA, srcB); + + if (productShiftLeft) + { + res = context.ShiftLeft(res, Const(16)); + } + + switch (mode) + { + case XmadCop.Cfull: + break; + + case XmadCop.Clo: + srcC = Extend16To32(srcC, high: false, signed: false); + break; + case XmadCop.Chi: + srcC = Extend16To32(srcC, high: true, signed: false); + break; + + case XmadCop.Cbcc: + srcC = context.IAdd(srcC, context.ShiftLeft(srcBUnmodified, Const(16))); + break; + + case XmadCop.Csfu: + Operand signAdjustA = context.ShiftLeft(context.ShiftRightU32(srcA, Const(31)), Const(16)); + Operand signAdjustB = context.ShiftLeft(context.ShiftRightU32(srcB, Const(31)), Const(16)); + + srcC = context.ISubtract(srcC, context.IAdd(signAdjustA, signAdjustB)); + break; + + default: + context.Config.GpuAccessor.Log($"Invalid XMAD mode \"{mode}\"."); + return; + } + + Operand product = res; + + if (extended) + { + // Add with carry. + res = context.IAdd(res, context.BitwiseAnd(GetCF(), Const(1))); + } + else + { + // Add (no carry in). + res = context.IAdd(res, srcC); + } + + SetIaddFlags(context, res, product, srcC, writeCC, extended); + + if (merge) + { + res = context.BitwiseAnd(res, Const(0xffff)); + res = context.BitwiseOr(res, context.ShiftLeft(srcBUnmodified, Const(16))); + } + + context.Copy(GetDest(rd), res); + } + + private static void SetIaddFlags(EmitterContext context, Operand res, Operand srcA, Operand srcB, bool setCC, bool extended) + { + if (!setCC) + { + return; + } + + if (extended) + { + // C = (d == a && CIn) || d < a + Operand tempC0 = context.ICompareEqual(res, srcA); + Operand tempC1 = context.ICompareLessUnsigned(res, srcA); + + tempC0 = context.BitwiseAnd(tempC0, GetCF()); + + context.Copy(GetCF(), context.BitwiseOr(tempC0, tempC1)); + } + else + { + // C = d < a + context.Copy(GetCF(), context.ICompareLessUnsigned(res, srcA)); + } + + // V = (d ^ a) & ~(a ^ b) < 0 + Operand tempV0 = context.BitwiseExclusiveOr(res, srcA); + Operand tempV1 = context.BitwiseExclusiveOr(srcA, srcB); + + tempV1 = context.BitwiseNot(tempV1); + + Operand tempV = context.BitwiseAnd(tempV0, tempV1); + + context.Copy(GetVF(), context.ICompareLess(tempV, Const(0))); + + SetZnFlags(context, res, setCC: true, extended: extended); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerComparison.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerComparison.cs new file mode 100644 index 00000000..dcdb189f --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerComparison.cs @@ -0,0 +1,310 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper; +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void IcmpR(EmitterContext context) + { + InstIcmpR op = context.GetOp<InstIcmpR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcC = GetSrcReg(context, op.SrcC); + + EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed); + } + + public static void IcmpI(EmitterContext context) + { + InstIcmpI op = context.GetOp<InstIcmpI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + var srcC = GetSrcReg(context, op.SrcC); + + EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed); + } + + public static void IcmpC(EmitterContext context) + { + InstIcmpC op = context.GetOp<InstIcmpC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + var srcC = GetSrcReg(context, op.SrcC); + + EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed); + } + + public static void IcmpRc(EmitterContext context) + { + InstIcmpRc op = context.GetOp<InstIcmpRc>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcC); + var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed); + } + + public static void IsetR(EmitterContext context) + { + InstIsetR op = context.GetOp<InstIsetR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitIset(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.BVal, op.Signed, op.X, op.WriteCC); + } + + public static void IsetI(EmitterContext context) + { + InstIsetI op = context.GetOp<InstIsetI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitIset(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.BVal, op.Signed, op.X, op.WriteCC); + } + + public static void IsetC(EmitterContext context) + { + InstIsetC op = context.GetOp<InstIsetC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitIset(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.BVal, op.Signed, op.X, op.WriteCC); + } + + public static void IsetpR(EmitterContext context) + { + InstIsetpR op = context.GetOp<InstIsetpR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitIsetp(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.Signed, op.X); + } + + public static void IsetpI(EmitterContext context) + { + InstIsetpI op = context.GetOp<InstIsetpI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitIsetp(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.Signed, op.X); + } + + public static void IsetpC(EmitterContext context) + { + InstIsetpC op = context.GetOp<InstIsetpC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitIsetp(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.Signed, op.X); + } + + private static void EmitIcmp( + EmitterContext context, + IComp cmpOp, + Operand srcA, + Operand srcB, + Operand srcC, + int rd, + bool isSigned) + { + Operand cmpRes = GetIntComparison(context, cmpOp, srcC, Const(0), isSigned); + + Operand res = context.ConditionalSelect(cmpRes, srcA, srcB); + + context.Copy(GetDest(rd), res); + } + + private static void EmitIset( + EmitterContext context, + IComp cmpOp, + BoolOp logicOp, + Operand srcA, + Operand srcB, + int srcPred, + bool srcPredInv, + int rd, + bool boolFloat, + bool isSigned, + bool extended, + bool writeCC) + { + Operand res = GetIntComparison(context, cmpOp, srcA, srcB, isSigned, extended); + Operand pred = GetPredicate(context, srcPred, srcPredInv); + + res = GetPredLogicalOp(context, logicOp, res, pred); + + Operand dest = GetDest(rd); + + if (boolFloat) + { + res = context.ConditionalSelect(res, ConstF(1), Const(0)); + + context.Copy(dest, res); + + SetFPZnFlags(context, res, writeCC); + } + else + { + context.Copy(dest, res); + + SetZnFlags(context, res, writeCC, extended); + } + } + + private static void EmitIsetp( + EmitterContext context, + IComp cmpOp, + BoolOp logicOp, + Operand srcA, + Operand srcB, + int srcPred, + bool srcPredInv, + int destPred, + int destPredInv, + bool isSigned, + bool extended) + { + Operand p0Res = GetIntComparison(context, cmpOp, srcA, srcB, isSigned, extended); + Operand p1Res = context.BitwiseNot(p0Res); + Operand pred = GetPredicate(context, srcPred, srcPredInv); + + p0Res = GetPredLogicalOp(context, logicOp, p0Res, pred); + p1Res = GetPredLogicalOp(context, logicOp, p1Res, pred); + + context.Copy(Register(destPred, RegisterType.Predicate), p0Res); + context.Copy(Register(destPredInv, RegisterType.Predicate), p1Res); + } + + private static Operand GetIntComparison( + EmitterContext context, + IComp cond, + Operand srcA, + Operand srcB, + bool isSigned, + bool extended) + { + return extended + ? GetIntComparisonExtended(context, cond, srcA, srcB, isSigned) + : GetIntComparison(context, cond, srcA, srcB, isSigned); + } + + private static Operand GetIntComparisonExtended(EmitterContext context, IComp cond, Operand srcA, Operand srcB, bool isSigned) + { + Operand res; + + if (cond == IComp.T) + { + res = Const(IrConsts.True); + } + else if (cond == IComp.F) + { + res = Const(IrConsts.False); + } + else + { + res = context.ISubtract(srcA, srcB); + res = context.IAdd(res, context.BitwiseNot(GetCF())); + + switch (cond) + { + case IComp.Eq: // r = xh == yh && xl == yl + res = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), GetZF()); + break; + case IComp.Lt: // r = xh < yh || (xh == yh && xl < yl) + Operand notC = context.BitwiseNot(GetCF()); + Operand prevLt = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), notC); + res = isSigned + ? context.BitwiseOr(context.ICompareLess(srcA, srcB), prevLt) + : context.BitwiseOr(context.ICompareLessUnsigned(srcA, srcB), prevLt); + break; + case IComp.Le: // r = xh < yh || (xh == yh && xl <= yl) + Operand zOrNotC = context.BitwiseOr(GetZF(), context.BitwiseNot(GetCF())); + Operand prevLe = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), zOrNotC); + res = isSigned + ? context.BitwiseOr(context.ICompareLess(srcA, srcB), prevLe) + : context.BitwiseOr(context.ICompareLessUnsigned(srcA, srcB), prevLe); + break; + case IComp.Gt: // r = xh > yh || (xh == yh && xl > yl) + Operand notZAndC = context.BitwiseAnd(context.BitwiseNot(GetZF()), GetCF()); + Operand prevGt = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), notZAndC); + res = isSigned + ? context.BitwiseOr(context.ICompareGreater(srcA, srcB), prevGt) + : context.BitwiseOr(context.ICompareGreaterUnsigned(srcA, srcB), prevGt); + break; + case IComp.Ge: // r = xh > yh || (xh == yh && xl >= yl) + Operand prevGe = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), GetCF()); + res = isSigned + ? context.BitwiseOr(context.ICompareGreater(srcA, srcB), prevGe) + : context.BitwiseOr(context.ICompareGreaterUnsigned(srcA, srcB), prevGe); + break; + case IComp.Ne: // r = xh != yh || xl != yl + res = context.BitwiseOr(context.ICompareNotEqual(srcA, srcB), context.BitwiseNot(GetZF())); + break; + default: + throw new ArgumentException($"Unexpected condition \"{cond}\"."); + } + } + + return res; + } + + private static Operand GetIntComparison(EmitterContext context, IComp cond, Operand srcA, Operand srcB, bool isSigned) + { + Operand res; + + if (cond == IComp.T) + { + res = Const(IrConsts.True); + } + else if (cond == IComp.F) + { + res = Const(IrConsts.False); + } + else + { + var inst = cond switch + { + IComp.Lt => Instruction.CompareLessU32, + IComp.Eq => Instruction.CompareEqual, + IComp.Le => Instruction.CompareLessOrEqualU32, + IComp.Gt => Instruction.CompareGreaterU32, + IComp.Ne => Instruction.CompareNotEqual, + IComp.Ge => Instruction.CompareGreaterOrEqualU32, + _ => throw new InvalidOperationException($"Unexpected condition \"{cond}\".") + }; + + if (isSigned) + { + switch (cond) + { + case IComp.Lt: inst = Instruction.CompareLess; break; + case IComp.Le: inst = Instruction.CompareLessOrEqual; break; + case IComp.Gt: inst = Instruction.CompareGreater; break; + case IComp.Ge: inst = Instruction.CompareGreaterOrEqual; break; + } + } + + res = context.Add(inst, Local(), srcA, srcB); + } + + return res; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerLogical.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerLogical.cs new file mode 100644 index 00000000..1f3f66ae --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerLogical.cs @@ -0,0 +1,167 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper; +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + private const int PT = RegisterConsts.PredicateTrueIndex; + + public static void LopR(EmitterContext context) + { + InstLopR op = context.GetOp<InstLopR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitLop(context, op.Lop, op.PredicateOp, srcA, srcB, op.Dest, op.DestPred, op.NegA, op.NegB, op.X, op.WriteCC); + } + + public static void LopI(EmitterContext context) + { + InstLopI op = context.GetOp<InstLopI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitLop(context, op.LogicOp, op.PredicateOp, srcA, srcB, op.Dest, op.DestPred, op.NegA, op.NegB, op.X, op.WriteCC); + } + + public static void LopC(EmitterContext context) + { + InstLopC op = context.GetOp<InstLopC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitLop(context, op.LogicOp, op.PredicateOp, srcA, srcB, op.Dest, op.DestPred, op.NegA, op.NegB, op.X, op.WriteCC); + } + + public static void Lop32i(EmitterContext context) + { + InstLop32i op = context.GetOp<InstLop32i>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, op.Imm32); + + EmitLop(context, op.LogicOp, PredicateOp.F, srcA, srcB, op.Dest, PT, op.NegA, op.NegB, op.X, op.WriteCC); + } + + public static void Lop3R(EmitterContext context) + { + InstLop3R op = context.GetOp<InstLop3R>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcC = GetSrcReg(context, op.SrcC); + + EmitLop3(context, op.Imm, op.PredicateOp, srcA, srcB, srcC, op.Dest, op.DestPred, op.X, op.WriteCC); + } + + public static void Lop3I(EmitterContext context) + { + InstLop3I op = context.GetOp<InstLop3I>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + var srcC = GetSrcReg(context, op.SrcC); + + EmitLop3(context, op.Imm, PredicateOp.F, srcA, srcB, srcC, op.Dest, PT, false, op.WriteCC); + } + + public static void Lop3C(EmitterContext context) + { + InstLop3C op = context.GetOp<InstLop3C>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + var srcC = GetSrcReg(context, op.SrcC); + + EmitLop3(context, op.Imm, PredicateOp.F, srcA, srcB, srcC, op.Dest, PT, false, op.WriteCC); + } + + private static void EmitLop( + EmitterContext context, + LogicOp logicOp, + PredicateOp predOp, + Operand srcA, + Operand srcB, + int rd, + int destPred, + bool invertA, + bool invertB, + bool extended, + bool writeCC) + { + srcA = context.BitwiseNot(srcA, invertA); + srcB = context.BitwiseNot(srcB, invertB); + + Operand res = logicOp switch + { + LogicOp.And => res = context.BitwiseAnd(srcA, srcB), + LogicOp.Or => res = context.BitwiseOr(srcA, srcB), + LogicOp.Xor => res = context.BitwiseExclusiveOr(srcA, srcB), + _ => srcB + }; + + EmitLopPredWrite(context, res, predOp, destPred); + + context.Copy(GetDest(rd), res); + + SetZnFlags(context, res, writeCC, extended); + } + + private static void EmitLop3( + EmitterContext context, + int truthTable, + PredicateOp predOp, + Operand srcA, + Operand srcB, + Operand srcC, + int rd, + int destPred, + bool extended, + bool writeCC) + { + Operand res = Lop3Expression.GetFromTruthTable(context, srcA, srcB, srcC, truthTable); + + EmitLopPredWrite(context, res, predOp, destPred); + + context.Copy(GetDest(rd), res); + + SetZnFlags(context, res, writeCC, extended); + } + + private static void EmitLopPredWrite(EmitterContext context, Operand result, PredicateOp predOp, int pred) + { + if (pred != RegisterConsts.PredicateTrueIndex) + { + Operand pRes; + + if (predOp == PredicateOp.F) + { + pRes = Const(IrConsts.False); + } + else if (predOp == PredicateOp.T) + { + pRes = Const(IrConsts.True); + } + else if (predOp == PredicateOp.Z) + { + pRes = context.ICompareEqual(result, Const(0)); + } + else /* if (predOp == Pop.Nz) */ + { + pRes = context.ICompareNotEqual(result, Const(0)); + } + + context.Copy(Register(pred, RegisterType.Predicate), pRes); + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerMinMax.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerMinMax.cs new file mode 100644 index 00000000..73930ed1 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerMinMax.cs @@ -0,0 +1,71 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper; +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void ImnmxR(EmitterContext context) + { + InstImnmxR op = context.GetOp<InstImnmxR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + EmitImnmx(context, srcA, srcB, srcPred, op.Dest, op.Signed, op.WriteCC); + } + + public static void ImnmxI(EmitterContext context) + { + InstImnmxI op = context.GetOp<InstImnmxI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + EmitImnmx(context, srcA, srcB, srcPred, op.Dest, op.Signed, op.WriteCC); + } + + public static void ImnmxC(EmitterContext context) + { + InstImnmxC op = context.GetOp<InstImnmxC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + EmitImnmx(context, srcA, srcB, srcPred, op.Dest, op.Signed, op.WriteCC); + } + + private static void EmitImnmx( + EmitterContext context, + Operand srcA, + Operand srcB, + Operand srcPred, + int rd, + bool isSignedInt, + bool writeCC) + { + Operand resMin = isSignedInt + ? context.IMinimumS32(srcA, srcB) + : context.IMinimumU32(srcA, srcB); + + Operand resMax = isSignedInt + ? context.IMaximumS32(srcA, srcB) + : context.IMaximumU32(srcA, srcB); + + Operand res = context.ConditionalSelect(srcPred, resMin, resMax); + + context.Copy(GetDest(rd), res); + + SetZnFlags(context, res, writeCC); + + // TODO: X flags. + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs new file mode 100644 index 00000000..c73c6b2a --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs @@ -0,0 +1,541 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + private enum MemoryRegion + { + Local, + Shared + } + + public static void Atom(EmitterContext context) + { + InstAtom op = context.GetOp<InstAtom>(); + + int sOffset = (op.Imm20 << 12) >> 12; + + (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, sOffset); + + Operand value = GetSrcReg(context, op.SrcB); + + Operand res = EmitAtomicOp(context, StorageKind.GlobalMemory, op.Op, op.Size, addrLow, addrHigh, value); + + context.Copy(GetDest(op.Dest), res); + } + + public static void Atoms(EmitterContext context) + { + InstAtoms op = context.GetOp<InstAtoms>(); + + Operand offset = context.ShiftRightU32(GetSrcReg(context, op.SrcA), Const(2)); + + int sOffset = (op.Imm22 << 10) >> 10; + + offset = context.IAdd(offset, Const(sOffset)); + + Operand value = GetSrcReg(context, op.SrcB); + + AtomSize size = op.AtomsSize switch + { + AtomsSize.S32 => AtomSize.S32, + AtomsSize.U64 => AtomSize.U64, + AtomsSize.S64 => AtomSize.S64, + _ => AtomSize.U32 + }; + + Operand res = EmitAtomicOp(context, StorageKind.SharedMemory, op.AtomOp, size, offset, Const(0), value); + + context.Copy(GetDest(op.Dest), res); + } + + public static void Ldc(EmitterContext context) + { + InstLdc op = context.GetOp<InstLdc>(); + + if (op.LsSize > LsSize2.B64) + { + context.Config.GpuAccessor.Log($"Invalid LDC size: {op.LsSize}."); + return; + } + + bool isSmallInt = op.LsSize < LsSize2.B32; + + int count = op.LsSize == LsSize2.B64 ? 2 : 1; + + Operand slot = Const(op.CbufSlot); + Operand srcA = GetSrcReg(context, op.SrcA); + + if (op.AddressMode == AddressMode.Is || op.AddressMode == AddressMode.Isl) + { + slot = context.IAdd(slot, context.BitfieldExtractU32(srcA, Const(16), Const(16))); + srcA = context.BitwiseAnd(srcA, Const(0xffff)); + } + + Operand addr = context.IAdd(srcA, Const(Imm16ToSInt(op.CbufOffset))); + Operand wordOffset = context.ShiftRightU32(addr, Const(2)); + Operand bitOffset = GetBitOffset(context, addr); + + for (int index = 0; index < count; index++) + { + Register dest = new Register(op.Dest + index, RegisterType.Gpr); + + if (dest.IsRZ) + { + break; + } + + Operand offset = context.IAdd(wordOffset, Const(index)); + Operand value = context.LoadConstant(slot, offset); + + if (isSmallInt) + { + value = ExtractSmallInt(context, (LsSize)op.LsSize, bitOffset, value); + } + + context.Copy(Register(dest), value); + } + } + + public static void Ldg(EmitterContext context) + { + InstLdg op = context.GetOp<InstLdg>(); + + EmitLdg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E); + } + + public static void Ldl(EmitterContext context) + { + InstLdl op = context.GetOp<InstLdl>(); + + EmitLoad(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24)); + } + + public static void Lds(EmitterContext context) + { + InstLds op = context.GetOp<InstLds>(); + + EmitLoad(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24)); + } + + public static void Red(EmitterContext context) + { + InstRed op = context.GetOp<InstRed>(); + + (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, op.Imm20); + + EmitAtomicOp(context, StorageKind.GlobalMemory, (AtomOp)op.RedOp, op.RedSize, addrLow, addrHigh, GetDest(op.SrcB)); + } + + public static void Stg(EmitterContext context) + { + InstStg op = context.GetOp<InstStg>(); + + EmitStg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E); + } + + public static void Stl(EmitterContext context) + { + InstStl op = context.GetOp<InstStl>(); + + EmitStore(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24)); + } + + public static void Sts(EmitterContext context) + { + InstSts op = context.GetOp<InstSts>(); + + EmitStore(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24)); + } + + private static Operand EmitAtomicOp( + EmitterContext context, + StorageKind storageKind, + AtomOp op, + AtomSize type, + Operand addrLow, + Operand addrHigh, + Operand value) + { + Operand res = Const(0); + + switch (op) + { + case AtomOp.Add: + if (type == AtomSize.S32 || type == AtomSize.U32) + { + res = context.AtomicAdd(storageKind, addrLow, addrHigh, value); + } + else + { + context.Config.GpuAccessor.Log($"Invalid reduction type: {type}."); + } + break; + case AtomOp.And: + if (type == AtomSize.S32 || type == AtomSize.U32) + { + res = context.AtomicAnd(storageKind, addrLow, addrHigh, value); + } + else + { + context.Config.GpuAccessor.Log($"Invalid reduction type: {type}."); + } + break; + case AtomOp.Xor: + if (type == AtomSize.S32 || type == AtomSize.U32) + { + res = context.AtomicXor(storageKind, addrLow, addrHigh, value); + } + else + { + context.Config.GpuAccessor.Log($"Invalid reduction type: {type}."); + } + break; + case AtomOp.Or: + if (type == AtomSize.S32 || type == AtomSize.U32) + { + res = context.AtomicOr(storageKind, addrLow, addrHigh, value); + } + else + { + context.Config.GpuAccessor.Log($"Invalid reduction type: {type}."); + } + break; + case AtomOp.Max: + if (type == AtomSize.S32) + { + res = context.AtomicMaxS32(storageKind, addrLow, addrHigh, value); + } + else if (type == AtomSize.U32) + { + res = context.AtomicMaxU32(storageKind, addrLow, addrHigh, value); + } + else + { + context.Config.GpuAccessor.Log($"Invalid reduction type: {type}."); + } + break; + case AtomOp.Min: + if (type == AtomSize.S32) + { + res = context.AtomicMinS32(storageKind, addrLow, addrHigh, value); + } + else if (type == AtomSize.U32) + { + res = context.AtomicMinU32(storageKind, addrLow, addrHigh, value); + } + else + { + context.Config.GpuAccessor.Log($"Invalid reduction type: {type}."); + } + break; + } + + return res; + } + + private static void EmitLoad( + EmitterContext context, + MemoryRegion region, + LsSize2 size, + Operand srcA, + int rd, + int offset) + { + if (size > LsSize2.B128) + { + context.Config.GpuAccessor.Log($"Invalid load size: {size}."); + return; + } + + bool isSmallInt = size < LsSize2.B32; + + int count = 1; + + switch (size) + { + case LsSize2.B64: count = 2; break; + case LsSize2.B128: count = 4; break; + } + + Operand baseOffset = context.IAdd(srcA, Const(offset)); + Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2)); // Word offset = byte offset / 4 (one word = 4 bytes). + Operand bitOffset = GetBitOffset(context, baseOffset); + + for (int index = 0; index < count; index++) + { + Register dest = new Register(rd + index, RegisterType.Gpr); + + if (dest.IsRZ) + { + break; + } + + Operand elemOffset = context.IAdd(wordOffset, Const(index)); + Operand value = null; + + switch (region) + { + case MemoryRegion.Local: value = context.LoadLocal(elemOffset); break; + case MemoryRegion.Shared: value = context.LoadShared(elemOffset); break; + } + + if (isSmallInt) + { + value = ExtractSmallInt(context, (LsSize)size, bitOffset, value); + } + + context.Copy(Register(dest), value); + } + } + + private static void EmitLdg( + EmitterContext context, + LsSize size, + int ra, + int rd, + int offset, + bool extended) + { + bool isSmallInt = size < LsSize.B32; + + int count = GetVectorCount(size); + + (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset); + + Operand bitOffset = GetBitOffset(context, addrLow); + + for (int index = 0; index < count; index++) + { + Register dest = new Register(rd + index, RegisterType.Gpr); + + if (dest.IsRZ) + { + break; + } + + Operand value = context.LoadGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh); + + if (isSmallInt) + { + value = ExtractSmallInt(context, size, bitOffset, value); + } + + context.Copy(Register(dest), value); + } + } + + private static void EmitStore( + EmitterContext context, + MemoryRegion region, + LsSize2 size, + Operand srcA, + int rd, + int offset) + { + if (size > LsSize2.B128) + { + context.Config.GpuAccessor.Log($"Invalid store size: {size}."); + return; + } + + bool isSmallInt = size < LsSize2.B32; + + int count = 1; + + switch (size) + { + case LsSize2.B64: count = 2; break; + case LsSize2.B128: count = 4; break; + } + + Operand baseOffset = context.IAdd(srcA, Const(offset)); + Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2)); + Operand bitOffset = GetBitOffset(context, baseOffset); + + for (int index = 0; index < count; index++) + { + bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex; + + Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr); + Operand elemOffset = context.IAdd(wordOffset, Const(index)); + + if (isSmallInt && region == MemoryRegion.Local) + { + Operand word = context.LoadLocal(elemOffset); + + value = InsertSmallInt(context, (LsSize)size, bitOffset, word, value); + } + + if (region == MemoryRegion.Local) + { + context.StoreLocal(elemOffset, value); + } + else if (region == MemoryRegion.Shared) + { + switch (size) + { + case LsSize2.U8: + case LsSize2.S8: + context.StoreShared8(baseOffset, value); + break; + case LsSize2.U16: + case LsSize2.S16: + context.StoreShared16(baseOffset, value); + break; + default: + context.StoreShared(elemOffset, value); + break; + } + } + } + } + + private static void EmitStg( + EmitterContext context, + LsSize2 size, + int ra, + int rd, + int offset, + bool extended) + { + if (size > LsSize2.B128) + { + context.Config.GpuAccessor.Log($"Invalid store size: {size}."); + return; + } + + int count = GetVectorCount((LsSize)size); + + (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset); + + Operand bitOffset = GetBitOffset(context, addrLow); + + for (int index = 0; index < count; index++) + { + bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex; + + Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr); + + Operand addrLowOffset = context.IAdd(addrLow, Const(index * 4)); + + if (size == LsSize2.U8 || size == LsSize2.S8) + { + context.StoreGlobal8(addrLowOffset, addrHigh, value); + } + else if (size == LsSize2.U16 || size == LsSize2.S16) + { + context.StoreGlobal16(addrLowOffset, addrHigh, value); + } + else + { + context.StoreGlobal(addrLowOffset, addrHigh, value); + } + } + } + + private static int GetVectorCount(LsSize size) + { + switch (size) + { + case LsSize.B64: + return 2; + case LsSize.B128: + case LsSize.UB128: + return 4; + } + + return 1; + } + + private static (Operand, Operand) Get40BitsAddress( + EmitterContext context, + Register ra, + bool extended, + int offset) + { + Operand addrLow = Register(ra); + Operand addrHigh; + + if (extended && !ra.IsRZ) + { + addrHigh = Register(ra.Index + 1, RegisterType.Gpr); + } + else + { + addrHigh = Const(0); + } + + Operand offs = Const(offset); + + addrLow = context.IAdd(addrLow, offs); + + if (extended) + { + Operand carry = context.ICompareLessUnsigned(addrLow, offs); + + addrHigh = context.IAdd(addrHigh, context.ConditionalSelect(carry, Const(1), Const(0))); + } + + return (addrLow, addrHigh); + } + + private static Operand GetBitOffset(EmitterContext context, Operand baseOffset) + { + // Note: bit offset = (baseOffset & 0b11) * 8. + // Addresses should be always aligned to the integer type, + // so we don't need to take unaligned addresses into account. + return context.ShiftLeft(context.BitwiseAnd(baseOffset, Const(3)), Const(3)); + } + + private static Operand ExtractSmallInt( + EmitterContext context, + LsSize size, + Operand bitOffset, + Operand value) + { + value = context.ShiftRightU32(value, bitOffset); + + switch (size) + { + case LsSize.U8: value = ZeroExtendTo32(context, value, 8); break; + case LsSize.U16: value = ZeroExtendTo32(context, value, 16); break; + case LsSize.S8: value = SignExtendTo32(context, value, 8); break; + case LsSize.S16: value = SignExtendTo32(context, value, 16); break; + } + + return value; + } + + private static Operand InsertSmallInt( + EmitterContext context, + LsSize size, + Operand bitOffset, + Operand word, + Operand value) + { + switch (size) + { + case LsSize.U8: + case LsSize.S8: + value = context.BitwiseAnd(value, Const(0xff)); + value = context.BitfieldInsert(word, value, bitOffset, Const(8)); + break; + + case LsSize.U16: + case LsSize.S16: + value = context.BitwiseAnd(value, Const(0xffff)); + value = context.BitfieldInsert(word, value, bitOffset, Const(16)); + break; + } + + return value; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs new file mode 100644 index 00000000..9992ac37 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs @@ -0,0 +1,237 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void MovR(EmitterContext context) + { + InstMovR op = context.GetOp<InstMovR>(); + + context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcA)); + } + + public static void MovI(EmitterContext context) + { + InstMovI op = context.GetOp<InstMovI>(); + + context.Copy(GetDest(op.Dest), GetSrcImm(context, op.Imm20)); + } + + public static void MovC(EmitterContext context) + { + InstMovC op = context.GetOp<InstMovC>(); + + context.Copy(GetDest(op.Dest), GetSrcCbuf(context, op.CbufSlot, op.CbufOffset)); + } + + public static void Mov32i(EmitterContext context) + { + InstMov32i op = context.GetOp<InstMov32i>(); + + context.Copy(GetDest(op.Dest), GetSrcImm(context, op.Imm32)); + } + + public static void R2pR(EmitterContext context) + { + InstR2pR op = context.GetOp<InstR2pR>(); + + Operand value = GetSrcReg(context, op.SrcA); + Operand mask = GetSrcReg(context, op.SrcB); + + EmitR2p(context, value, mask, op.ByteSel, op.Ccpr); + } + + public static void R2pI(EmitterContext context) + { + InstR2pI op = context.GetOp<InstR2pI>(); + + Operand value = GetSrcReg(context, op.SrcA); + Operand mask = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitR2p(context, value, mask, op.ByteSel, op.Ccpr); + } + + public static void R2pC(EmitterContext context) + { + InstR2pC op = context.GetOp<InstR2pC>(); + + Operand value = GetSrcReg(context, op.SrcA); + Operand mask = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitR2p(context, value, mask, op.ByteSel, op.Ccpr); + } + + public static void S2r(EmitterContext context) + { + InstS2r op = context.GetOp<InstS2r>(); + + Operand src; + + switch (op.SReg) + { + case SReg.LaneId: + src = context.Load(StorageKind.Input, IoVariable.SubgroupLaneId); + break; + + case SReg.InvocationId: + src = context.Load(StorageKind.Input, IoVariable.InvocationId); + break; + + case SReg.YDirection: + src = ConstF(1); // TODO: Use value from Y direction GPU register. + break; + + case SReg.ThreadKill: + src = context.Config.Stage == ShaderStage.Fragment ? context.Load(StorageKind.Input, IoVariable.ThreadKill) : Const(0); + break; + + case SReg.InvocationInfo: + if (context.Config.Stage != ShaderStage.Compute && context.Config.Stage != ShaderStage.Fragment) + { + // Note: Lowest 8-bits seems to contain some primitive index, + // but it seems to be NVIDIA implementation specific as it's only used + // to calculate ISBE offsets, so we can just keep it as zero. + + if (context.Config.Stage == ShaderStage.TessellationControl || + context.Config.Stage == ShaderStage.TessellationEvaluation) + { + src = context.ShiftLeft(context.Load(StorageKind.Input, IoVariable.PatchVertices), Const(16)); + } + else + { + src = Const(context.Config.GpuAccessor.QueryPrimitiveTopology().ToInputVertices() << 16); + } + } + else + { + src = Const(0); + } + break; + + case SReg.TId: + Operand tidX = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(0)); + Operand tidY = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(1)); + Operand tidZ = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(2)); + + tidY = context.ShiftLeft(tidY, Const(16)); + tidZ = context.ShiftLeft(tidZ, Const(26)); + + src = context.BitwiseOr(tidX, context.BitwiseOr(tidY, tidZ)); + break; + + case SReg.TIdX: + src = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(0)); + break; + case SReg.TIdY: + src = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(1)); + break; + case SReg.TIdZ: + src = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(2)); + break; + + case SReg.CtaIdX: + src = context.Load(StorageKind.Input, IoVariable.CtaId, null, Const(0)); + break; + case SReg.CtaIdY: + src = context.Load(StorageKind.Input, IoVariable.CtaId, null, Const(1)); + break; + case SReg.CtaIdZ: + src = context.Load(StorageKind.Input, IoVariable.CtaId, null, Const(2)); + break; + + case SReg.EqMask: + src = context.Load(StorageKind.Input, IoVariable.SubgroupEqMask, null, Const(0)); + break; + case SReg.LtMask: + src = context.Load(StorageKind.Input, IoVariable.SubgroupLtMask, null, Const(0)); + break; + case SReg.LeMask: + src = context.Load(StorageKind.Input, IoVariable.SubgroupLeMask, null, Const(0)); + break; + case SReg.GtMask: + src = context.Load(StorageKind.Input, IoVariable.SubgroupGtMask, null, Const(0)); + break; + case SReg.GeMask: + src = context.Load(StorageKind.Input, IoVariable.SubgroupGeMask, null, Const(0)); + break; + + default: + src = Const(0); + break; + } + + context.Copy(GetDest(op.Dest), src); + } + + public static void SelR(EmitterContext context) + { + InstSelR op = context.GetOp<InstSelR>(); + + Operand srcA = GetSrcReg(context, op.SrcA); + Operand srcB = GetSrcReg(context, op.SrcB); + Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + EmitSel(context, srcA, srcB, srcPred, op.Dest); + } + + public static void SelI(EmitterContext context) + { + InstSelI op = context.GetOp<InstSelI>(); + + Operand srcA = GetSrcReg(context, op.SrcA); + Operand srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + EmitSel(context, srcA, srcB, srcPred, op.Dest); + } + + public static void SelC(EmitterContext context) + { + InstSelC op = context.GetOp<InstSelC>(); + + Operand srcA = GetSrcReg(context, op.SrcA); + Operand srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + EmitSel(context, srcA, srcB, srcPred, op.Dest); + } + + private static void EmitR2p(EmitterContext context, Operand value, Operand mask, ByteSel byteSel, bool ccpr) + { + Operand Test(Operand value, int bit) + { + return context.ICompareNotEqual(context.BitwiseAnd(value, Const(1 << bit)), Const(0)); + } + + if (ccpr) + { + // TODO: Support Register to condition code flags copy. + context.Config.GpuAccessor.Log("R2P.CC not implemented."); + } + else + { + int shift = (int)byteSel * 8; + + for (int bit = 0; bit < RegisterConsts.PredsCount; bit++) + { + Operand pred = Register(bit, RegisterType.Predicate); + Operand res = context.ConditionalSelect(Test(mask, bit), Test(value, bit + shift), pred); + context.Copy(pred, res); + } + } + } + + private static void EmitSel(EmitterContext context, Operand srcA, Operand srcB, Operand srcPred, int rd) + { + Operand res = context.ConditionalSelect(srcPred, srcA, srcB); + + context.Copy(GetDest(rd), res); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMultifunction.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMultifunction.cs new file mode 100644 index 00000000..1ea7d321 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMultifunction.cs @@ -0,0 +1,97 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void RroR(EmitterContext context) + { + InstRroR op = context.GetOp<InstRroR>(); + + EmitRro(context, GetSrcReg(context, op.SrcB), op.Dest, op.AbsB, op.NegB); + } + + public static void RroI(EmitterContext context) + { + InstRroI op = context.GetOp<InstRroI>(); + + EmitRro(context, GetSrcImm(context, Imm20ToFloat(op.Imm20)), op.Dest, op.AbsB, op.NegB); + } + + public static void RroC(EmitterContext context) + { + InstRroC op = context.GetOp<InstRroC>(); + + EmitRro(context, GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.AbsB, op.NegB); + } + + public static void Mufu(EmitterContext context) + { + InstMufu op = context.GetOp<InstMufu>(); + + Operand res = context.FPAbsNeg(GetSrcReg(context, op.SrcA), op.AbsA, op.NegA); + + switch (op.MufuOp) + { + case MufuOp.Cos: + res = context.FPCosine(res); + break; + + case MufuOp.Sin: + res = context.FPSine(res); + break; + + case MufuOp.Ex2: + res = context.FPExponentB2(res); + break; + + case MufuOp.Lg2: + res = context.FPLogarithmB2(res); + break; + + case MufuOp.Rcp: + res = context.FPReciprocal(res); + break; + + case MufuOp.Rsq: + res = context.FPReciprocalSquareRoot(res); + break; + + case MufuOp.Rcp64h: + res = context.PackDouble2x32(OperandHelper.Const(0), res); + res = context.UnpackDouble2x32High(context.FPReciprocal(res, Instruction.FP64)); + break; + + case MufuOp.Rsq64h: + res = context.PackDouble2x32(OperandHelper.Const(0), res); + res = context.UnpackDouble2x32High(context.FPReciprocalSquareRoot(res, Instruction.FP64)); + break; + + case MufuOp.Sqrt: + res = context.FPSquareRoot(res); + break; + + default: + context.Config.GpuAccessor.Log($"Invalid MUFU operation \"{op.MufuOp}\"."); + break; + } + + context.Copy(GetDest(op.Dest), context.FPSaturate(res, op.Sat)); + } + + private static void EmitRro(EmitterContext context, Operand srcB, int rd, bool absB, bool negB) + { + // This is the range reduction operator, + // we translate it as a simple move, as it + // should be always followed by a matching + // MUFU instruction. + srcB = context.FPAbsNeg(srcB, absB, negB); + + context.Copy(GetDest(rd), srcB); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitNop.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitNop.cs new file mode 100644 index 00000000..01144007 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitNop.cs @@ -0,0 +1,15 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.Translation; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void Nop(EmitterContext context) + { + InstNop op = context.GetOp<InstNop>(); + + // No operation. + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitPredicate.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitPredicate.cs new file mode 100644 index 00000000..d605661f --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitPredicate.cs @@ -0,0 +1,54 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper; +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void Pset(EmitterContext context) + { + InstPset op = context.GetOp<InstPset>(); + + Operand srcA = context.BitwiseNot(Register(op.Src2Pred, RegisterType.Predicate), op.Src2PredInv); + Operand srcB = context.BitwiseNot(Register(op.Src1Pred, RegisterType.Predicate), op.Src1PredInv); + Operand srcC = context.BitwiseNot(Register(op.SrcPred, RegisterType.Predicate), op.SrcPredInv); + + Operand res = GetPredLogicalOp(context, op.BoolOpAB, srcA, srcB); + res = GetPredLogicalOp(context, op.BoolOpC, res, srcC); + + Operand dest = GetDest(op.Dest); + + if (op.BVal) + { + context.Copy(dest, context.ConditionalSelect(res, ConstF(1), Const(0))); + } + else + { + context.Copy(dest, res); + } + } + + public static void Psetp(EmitterContext context) + { + InstPsetp op = context.GetOp<InstPsetp>(); + + Operand srcA = context.BitwiseNot(Register(op.Src2Pred, RegisterType.Predicate), op.Src2PredInv); + Operand srcB = context.BitwiseNot(Register(op.Src1Pred, RegisterType.Predicate), op.Src1PredInv); + + Operand p0Res = GetPredLogicalOp(context, op.BoolOpAB, srcA, srcB); + Operand p1Res = context.BitwiseNot(p0Res); + Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + p0Res = GetPredLogicalOp(context, op.BoolOpC, p0Res, srcPred); + p1Res = GetPredLogicalOp(context, op.BoolOpC, p1Res, srcPred); + + context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res); + context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitShift.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitShift.cs new file mode 100644 index 00000000..2873cad8 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitShift.cs @@ -0,0 +1,249 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void ShfLR(EmitterContext context) + { + InstShfLR op = context.GetOp<InstShfLR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcC = GetSrcReg(context, op.SrcC); + + EmitShf(context, op.MaxShift, srcA, srcB, srcC, op.Dest, op.M, left: true, op.WriteCC); + } + + public static void ShfRR(EmitterContext context) + { + InstShfRR op = context.GetOp<InstShfRR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcC = GetSrcReg(context, op.SrcC); + + EmitShf(context, op.MaxShift, srcA, srcB, srcC, op.Dest, op.M, left: false, op.WriteCC); + } + + public static void ShfLI(EmitterContext context) + { + InstShfLI op = context.GetOp<InstShfLI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = Const(op.Imm6); + var srcC = GetSrcReg(context, op.SrcC); + + EmitShf(context, op.MaxShift, srcA, srcB, srcC, op.Dest, op.M, left: true, op.WriteCC); + } + + public static void ShfRI(EmitterContext context) + { + InstShfRI op = context.GetOp<InstShfRI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = Const(op.Imm6); + var srcC = GetSrcReg(context, op.SrcC); + + EmitShf(context, op.MaxShift, srcA, srcB, srcC, op.Dest, op.M, left: false, op.WriteCC); + } + + public static void ShlR(EmitterContext context) + { + InstShlR op = context.GetOp<InstShlR>(); + + EmitShl(context, GetSrcReg(context, op.SrcA), GetSrcReg(context, op.SrcB), op.Dest, op.M); + } + + public static void ShlI(EmitterContext context) + { + InstShlI op = context.GetOp<InstShlI>(); + + EmitShl(context, GetSrcReg(context, op.SrcA), GetSrcImm(context, Imm20ToSInt(op.Imm20)), op.Dest, op.M); + } + + public static void ShlC(EmitterContext context) + { + InstShlC op = context.GetOp<InstShlC>(); + + EmitShl(context, GetSrcReg(context, op.SrcA), GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.M); + } + + public static void ShrR(EmitterContext context) + { + InstShrR op = context.GetOp<InstShrR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitShr(context, srcA, srcB, op.Dest, op.M, op.Brev, op.Signed); + } + + public static void ShrI(EmitterContext context) + { + InstShrI op = context.GetOp<InstShrI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitShr(context, srcA, srcB, op.Dest, op.M, op.Brev, op.Signed); + } + + public static void ShrC(EmitterContext context) + { + InstShrC op = context.GetOp<InstShrC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitShr(context, srcA, srcB, op.Dest, op.M, op.Brev, op.Signed); + } + + private static void EmitShf( + EmitterContext context, + MaxShift maxShift, + Operand srcA, + Operand srcB, + Operand srcC, + int rd, + bool mask, + bool left, + bool writeCC) + { + bool isLongShift = maxShift == MaxShift.U64 || maxShift == MaxShift.S64; + bool signedShift = maxShift == MaxShift.S64; + int maxShiftConst = isLongShift ? 64 : 32; + + if (mask) + { + srcB = context.BitwiseAnd(srcB, Const(maxShiftConst - 1)); + } + + Operand res; + + if (left) + { + // res = (C << B) | (A >> (32 - B)) + res = context.ShiftLeft(srcC, srcB); + res = context.BitwiseOr(res, context.ShiftRightU32(srcA, context.ISubtract(Const(32), srcB))); + + if (isLongShift) + { + // res = B >= 32 ? A << (B - 32) : res + Operand lowerShift = context.ShiftLeft(srcA, context.ISubtract(srcB, Const(32))); + + Operand shiftGreaterThan31 = context.ICompareGreaterOrEqualUnsigned(srcB, Const(32)); + res = context.ConditionalSelect(shiftGreaterThan31, lowerShift, res); + } + } + else + { + // res = (A >> B) | (C << (32 - B)) + res = context.ShiftRightU32(srcA, srcB); + res = context.BitwiseOr(res, context.ShiftLeft(srcC, context.ISubtract(Const(32), srcB))); + + if (isLongShift) + { + // res = B >= 32 ? C >> (B - 32) : res + Operand upperShift = signedShift + ? context.ShiftRightS32(srcC, context.ISubtract(srcB, Const(32))) + : context.ShiftRightU32(srcC, context.ISubtract(srcB, Const(32))); + + Operand shiftGreaterThan31 = context.ICompareGreaterOrEqualUnsigned(srcB, Const(32)); + res = context.ConditionalSelect(shiftGreaterThan31, upperShift, res); + } + } + + if (!mask) + { + // Clamped shift value. + Operand isLessThanMax = context.ICompareLessUnsigned(srcB, Const(maxShiftConst)); + + res = context.ConditionalSelect(isLessThanMax, res, Const(0)); + } + + context.Copy(GetDest(rd), res); + + if (writeCC) + { + InstEmitAluHelper.SetZnFlags(context, res, writeCC); + } + + // TODO: X. + } + + private static void EmitShl(EmitterContext context, Operand srcA, Operand srcB, int rd, bool mask) + { + if (mask) + { + srcB = context.BitwiseAnd(srcB, Const(0x1f)); + } + + Operand res = context.ShiftLeft(srcA, srcB); + + if (!mask) + { + // Clamped shift value. + Operand isLessThan32 = context.ICompareLessUnsigned(srcB, Const(32)); + + res = context.ConditionalSelect(isLessThan32, res, Const(0)); + } + + // TODO: X, CC. + + context.Copy(GetDest(rd), res); + } + + private static void EmitShr( + EmitterContext context, + Operand srcA, + Operand srcB, + int rd, + bool mask, + bool bitReverse, + bool isSigned) + { + if (bitReverse) + { + srcA = context.BitfieldReverse(srcA); + } + + if (mask) + { + srcB = context.BitwiseAnd(srcB, Const(0x1f)); + } + + Operand res = isSigned + ? context.ShiftRightS32(srcA, srcB) + : context.ShiftRightU32(srcA, srcB); + + if (!mask) + { + // Clamped shift value. + Operand resShiftBy32; + + if (isSigned) + { + resShiftBy32 = context.ShiftRightS32(srcA, Const(31)); + } + else + { + resShiftBy32 = Const(0); + } + + Operand isLessThan32 = context.ICompareLessUnsigned(srcB, Const(32)); + + res = context.ConditionalSelect(isLessThan32, res, resShiftBy32); + } + + // TODO: X, CC. + + context.Copy(GetDest(rd), res); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitSurface.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitSurface.cs new file mode 100644 index 00000000..3d94b893 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitSurface.cs @@ -0,0 +1,796 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Collections.Generic; +using System.Numerics; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void SuatomB(EmitterContext context) + { + InstSuatomB op = context.GetOp<InstSuatomB>(); + + EmitSuatom( + context, + op.Dim, + op.Op, + op.Size, + 0, + op.SrcA, + op.SrcB, + op.SrcC, + op.Dest, + op.Ba, + isBindless: true, + compareAndSwap: false); + } + + public static void Suatom(EmitterContext context) + { + InstSuatom op = context.GetOp<InstSuatom>(); + + EmitSuatom( + context, + op.Dim, + op.Op, + op.Size, + op.TidB, + op.SrcA, + op.SrcB, + 0, + op.Dest, + op.Ba, + isBindless: false, + compareAndSwap: false); + } + + public static void SuatomB2(EmitterContext context) + { + InstSuatomB2 op = context.GetOp<InstSuatomB2>(); + + EmitSuatom( + context, + op.Dim, + op.Op, + op.Size, + 0, + op.SrcA, + op.SrcB, + op.SrcC, + op.Dest, + op.Ba, + isBindless: true, + compareAndSwap: false); + } + + public static void SuatomCasB(EmitterContext context) + { + InstSuatomCasB op = context.GetOp<InstSuatomCasB>(); + + EmitSuatom( + context, + op.Dim, + 0, + op.Size, + 0, + op.SrcA, + op.SrcB, + op.SrcC, + op.Dest, + op.Ba, + isBindless: true, + compareAndSwap: true); + } + + public static void SuatomCas(EmitterContext context) + { + InstSuatomCas op = context.GetOp<InstSuatomCas>(); + + EmitSuatom( + context, + op.Dim, + 0, + op.Size, + op.TidB, + op.SrcA, + op.SrcB, + 0, + op.Dest, + op.Ba, + isBindless: false, + compareAndSwap: true); + } + + public static void SuldDB(EmitterContext context) + { + InstSuldDB op = context.GetOp<InstSuldDB>(); + + EmitSuld(context, op.CacheOp, op.Dim, op.Size, 0, 0, op.SrcA, op.Dest, op.SrcC, useComponents: false, op.Ba, isBindless: true); + } + + public static void SuldD(EmitterContext context) + { + InstSuldD op = context.GetOp<InstSuldD>(); + + EmitSuld(context, op.CacheOp, op.Dim, op.Size, op.TidB, 0, op.SrcA, op.Dest, 0, useComponents: false, op.Ba, isBindless: false); + } + + public static void SuldB(EmitterContext context) + { + InstSuldB op = context.GetOp<InstSuldB>(); + + EmitSuld(context, op.CacheOp, op.Dim, 0, 0, op.Rgba, op.SrcA, op.Dest, op.SrcC, useComponents: true, false, isBindless: true); + } + + public static void Suld(EmitterContext context) + { + InstSuld op = context.GetOp<InstSuld>(); + + EmitSuld(context, op.CacheOp, op.Dim, 0, op.TidB, op.Rgba, op.SrcA, op.Dest, 0, useComponents: true, false, isBindless: false); + } + + public static void SuredB(EmitterContext context) + { + InstSuredB op = context.GetOp<InstSuredB>(); + + EmitSured(context, op.Dim, op.Op, op.Size, 0, op.SrcA, op.Dest, op.SrcC, op.Ba, isBindless: true); + } + + public static void Sured(EmitterContext context) + { + InstSured op = context.GetOp<InstSured>(); + + EmitSured(context, op.Dim, op.Op, op.Size, op.TidB, op.SrcA, op.Dest, 0, op.Ba, isBindless: false); + } + + public static void SustDB(EmitterContext context) + { + InstSustDB op = context.GetOp<InstSustDB>(); + + EmitSust(context, op.CacheOp, op.Dim, op.Size, 0, 0, op.SrcA, op.Dest, op.SrcC, useComponents: false, op.Ba, isBindless: true); + } + + public static void SustD(EmitterContext context) + { + InstSustD op = context.GetOp<InstSustD>(); + + EmitSust(context, op.CacheOp, op.Dim, op.Size, op.TidB, 0, op.SrcA, op.Dest, 0, useComponents: false, op.Ba, isBindless: false); + } + + public static void SustB(EmitterContext context) + { + InstSustB op = context.GetOp<InstSustB>(); + + EmitSust(context, op.CacheOp, op.Dim, 0, 0, op.Rgba, op.SrcA, op.Dest, op.SrcC, useComponents: true, false, isBindless: true); + } + + public static void Sust(EmitterContext context) + { + InstSust op = context.GetOp<InstSust>(); + + EmitSust(context, op.CacheOp, op.Dim, 0, op.TidB, op.Rgba, op.SrcA, op.Dest, 0, useComponents: true, false, isBindless: false); + } + + private static void EmitSuatom( + EmitterContext context, + SuDim dimensions, + SuatomOp atomicOp, + SuatomSize size, + int imm, + int srcA, + int srcB, + int srcC, + int dest, + bool byteAddress, + bool isBindless, + bool compareAndSwap) + { + SamplerType type = ConvertSamplerType(dimensions); + + if (type == SamplerType.None) + { + context.Config.GpuAccessor.Log("Invalid image atomic sampler type."); + return; + } + + Operand Ra() + { + if (srcA > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcA++, RegisterType.Gpr)); + } + + Operand Rb() + { + if (srcB > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcB++, RegisterType.Gpr)); + } + + Operand destOperand = dest != RegisterConsts.RegisterZeroIndex ? Register(dest, RegisterType.Gpr) : null; + + List<Operand> sourcesList = new List<Operand>(); + + if (isBindless) + { + sourcesList.Add(context.Copy(GetSrcReg(context, srcC))); + } + + int coordsCount = type.GetDimensions(); + + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(Ra()); + } + + if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D) + { + sourcesList.Add(Const(0)); + + type &= ~SamplerType.Mask; + type |= SamplerType.Texture2D; + } + + if (type.HasFlag(SamplerType.Array)) + { + sourcesList.Add(Ra()); + + type |= SamplerType.Array; + } + + if (byteAddress) + { + int xIndex = isBindless ? 1 : 0; + + sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(size))); + } + + // TODO: FP and 64-bit formats. + TextureFormat format = size == SuatomSize.Sd32 || size == SuatomSize.Sd64 + ? (isBindless ? TextureFormat.Unknown : context.Config.GetTextureFormatAtomic(imm)) + : GetTextureFormat(size); + + if (compareAndSwap) + { + sourcesList.Add(Rb()); + } + + sourcesList.Add(Rb()); + + Operand[] sources = sourcesList.ToArray(); + + TextureFlags flags = compareAndSwap ? TextureFlags.CAS : GetAtomicOpFlags(atomicOp); + + if (isBindless) + { + flags |= TextureFlags.Bindless; + } + + TextureOperation operation = context.CreateTextureOperation( + Instruction.ImageAtomic, + type, + format, + flags, + imm, + 0, + new[] { destOperand }, + sources); + + context.Add(operation); + } + + private static void EmitSuld( + EmitterContext context, + CacheOpLd cacheOp, + SuDim dimensions, + SuSize size, + int imm, + SuRgba componentMask, + int srcA, + int srcB, + int srcC, + bool useComponents, + bool byteAddress, + bool isBindless) + { + context.Config.SetUsedFeature(FeatureFlags.IntegerSampling); + + SamplerType type = ConvertSamplerType(dimensions); + + if (type == SamplerType.None) + { + context.Config.GpuAccessor.Log("Invalid image store sampler type."); + return; + } + + Operand Ra() + { + if (srcA > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcA++, RegisterType.Gpr)); + } + + List<Operand> sourcesList = new List<Operand>(); + + if (isBindless) + { + sourcesList.Add(context.Copy(Register(srcC, RegisterType.Gpr))); + } + + int coordsCount = type.GetDimensions(); + + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(Ra()); + } + + if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D) + { + sourcesList.Add(Const(0)); + + type &= ~SamplerType.Mask; + type |= SamplerType.Texture2D; + } + + if (type.HasFlag(SamplerType.Array)) + { + sourcesList.Add(Ra()); + } + + Operand[] sources = sourcesList.ToArray(); + + int handle = imm; + + TextureFlags flags = isBindless ? TextureFlags.Bindless : TextureFlags.None; + + if (cacheOp == CacheOpLd.Cg) + { + flags |= TextureFlags.Coherent; + } + + if (useComponents) + { + Operand[] dests = new Operand[BitOperations.PopCount((uint)componentMask)]; + + int outputIndex = 0; + + for (int i = 0; i < dests.Length; i++) + { + if (srcB + i >= RegisterConsts.RegisterZeroIndex) + { + break; + } + + dests[outputIndex++] = Register(srcB + i, RegisterType.Gpr); + } + + if (outputIndex != dests.Length) + { + Array.Resize(ref dests, outputIndex); + } + + TextureOperation operation = context.CreateTextureOperation( + Instruction.ImageLoad, + type, + flags, + handle, + (int)componentMask, + dests, + sources); + + if (!isBindless) + { + operation.Format = context.Config.GetTextureFormat(handle); + } + + context.Add(operation); + } + else + { + if (byteAddress) + { + int xIndex = isBindless ? 1 : 0; + + sources[xIndex] = context.ShiftRightS32(sources[xIndex], Const(GetComponentSizeInBytesLog2(size))); + } + + int components = GetComponents(size); + int compMask = (1 << components) - 1; + + Operand[] dests = new Operand[components]; + + int outputIndex = 0; + + for (int i = 0; i < dests.Length; i++) + { + if (srcB + i >= RegisterConsts.RegisterZeroIndex) + { + break; + } + + dests[outputIndex++] = Register(srcB + i, RegisterType.Gpr); + } + + if (outputIndex != dests.Length) + { + Array.Resize(ref dests, outputIndex); + } + + TextureOperation operation = context.CreateTextureOperation( + Instruction.ImageLoad, + type, + GetTextureFormat(size), + flags, + handle, + compMask, + dests, + sources); + + context.Add(operation); + + switch (size) + { + case SuSize.U8: context.Copy(dests[0], ZeroExtendTo32(context, dests[0], 8)); break; + case SuSize.U16: context.Copy(dests[0], ZeroExtendTo32(context, dests[0], 16)); break; + case SuSize.S8: context.Copy(dests[0], SignExtendTo32(context, dests[0], 8)); break; + case SuSize.S16: context.Copy(dests[0], SignExtendTo32(context, dests[0], 16)); break; + } + } + } + + private static void EmitSured( + EmitterContext context, + SuDim dimensions, + RedOp atomicOp, + SuatomSize size, + int imm, + int srcA, + int srcB, + int srcC, + bool byteAddress, + bool isBindless) + { + SamplerType type = ConvertSamplerType(dimensions); + + if (type == SamplerType.None) + { + context.Config.GpuAccessor.Log("Invalid image reduction sampler type."); + return; + } + + Operand Ra() + { + if (srcA > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcA++, RegisterType.Gpr)); + } + + Operand Rb() + { + if (srcB > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcB++, RegisterType.Gpr)); + } + + List<Operand> sourcesList = new List<Operand>(); + + if (isBindless) + { + sourcesList.Add(context.Copy(GetSrcReg(context, srcC))); + } + + int coordsCount = type.GetDimensions(); + + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(Ra()); + } + + if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D) + { + sourcesList.Add(Const(0)); + + type &= ~SamplerType.Mask; + type |= SamplerType.Texture2D; + } + + if (type.HasFlag(SamplerType.Array)) + { + sourcesList.Add(Ra()); + + type |= SamplerType.Array; + } + + if (byteAddress) + { + int xIndex = isBindless ? 1 : 0; + + sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(size))); + } + + // TODO: FP and 64-bit formats. + TextureFormat format = size == SuatomSize.Sd32 || size == SuatomSize.Sd64 + ? (isBindless ? TextureFormat.Unknown : context.Config.GetTextureFormatAtomic(imm)) + : GetTextureFormat(size); + + sourcesList.Add(Rb()); + + Operand[] sources = sourcesList.ToArray(); + + TextureFlags flags = GetAtomicOpFlags((SuatomOp)atomicOp); + + if (isBindless) + { + flags |= TextureFlags.Bindless; + } + + TextureOperation operation = context.CreateTextureOperation( + Instruction.ImageAtomic, + type, + format, + flags, + imm, + 0, + null, + sources); + + context.Add(operation); + } + + private static void EmitSust( + EmitterContext context, + CacheOpSt cacheOp, + SuDim dimensions, + SuSize size, + int imm, + SuRgba componentMask, + int srcA, + int srcB, + int srcC, + bool useComponents, + bool byteAddress, + bool isBindless) + { + SamplerType type = ConvertSamplerType(dimensions); + + if (type == SamplerType.None) + { + context.Config.GpuAccessor.Log("Invalid image store sampler type."); + return; + } + + Operand Ra() + { + if (srcA > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcA++, RegisterType.Gpr)); + } + + Operand Rb() + { + if (srcB > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcB++, RegisterType.Gpr)); + } + + List<Operand> sourcesList = new List<Operand>(); + + if (isBindless) + { + sourcesList.Add(context.Copy(Register(srcC, RegisterType.Gpr))); + } + + int coordsCount = type.GetDimensions(); + + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(Ra()); + } + + if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D) + { + sourcesList.Add(Const(0)); + + type &= ~SamplerType.Mask; + type |= SamplerType.Texture2D; + } + + if (type.HasFlag(SamplerType.Array)) + { + sourcesList.Add(Ra()); + } + + TextureFormat format = TextureFormat.Unknown; + + if (useComponents) + { + for (int compMask = (int)componentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++) + { + if ((compMask & 1) != 0) + { + sourcesList.Add(Rb()); + } + } + + if (!isBindless) + { + format = context.Config.GetTextureFormat(imm); + } + } + else + { + if (byteAddress) + { + int xIndex = isBindless ? 1 : 0; + + sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(size))); + } + + int components = GetComponents(size); + + for (int compIndex = 0; compIndex < components; compIndex++) + { + sourcesList.Add(Rb()); + } + + format = GetTextureFormat(size); + } + + Operand[] sources = sourcesList.ToArray(); + + int handle = imm; + + TextureFlags flags = isBindless ? TextureFlags.Bindless : TextureFlags.None; + + if (cacheOp == CacheOpSt.Cg) + { + flags |= TextureFlags.Coherent; + } + + TextureOperation operation = context.CreateTextureOperation( + Instruction.ImageStore, + type, + format, + flags, + handle, + 0, + null, + sources); + + context.Add(operation); + } + + private static int GetComponentSizeInBytesLog2(SuatomSize size) + { + return size switch + { + SuatomSize.U32 => 2, + SuatomSize.S32 => 2, + SuatomSize.U64 => 3, + SuatomSize.F32FtzRn => 2, + SuatomSize.F16x2FtzRn => 2, + SuatomSize.S64 => 3, + SuatomSize.Sd32 => 2, + SuatomSize.Sd64 => 3, + _ => 2 + }; + } + + private static TextureFormat GetTextureFormat(SuatomSize size) + { + return size switch + { + SuatomSize.U32 => TextureFormat.R32Uint, + SuatomSize.S32 => TextureFormat.R32Sint, + SuatomSize.U64 => TextureFormat.R32G32Uint, + SuatomSize.F32FtzRn => TextureFormat.R32Float, + SuatomSize.F16x2FtzRn => TextureFormat.R16G16Float, + SuatomSize.S64 => TextureFormat.R32G32Uint, + SuatomSize.Sd32 => TextureFormat.R32Uint, + SuatomSize.Sd64 => TextureFormat.R32G32Uint, + _ => TextureFormat.R32Uint + }; + } + + private static TextureFlags GetAtomicOpFlags(SuatomOp op) + { + return op switch + { + SuatomOp.Add => TextureFlags.Add, + SuatomOp.Min => TextureFlags.Minimum, + SuatomOp.Max => TextureFlags.Maximum, + SuatomOp.Inc => TextureFlags.Increment, + SuatomOp.Dec => TextureFlags.Decrement, + SuatomOp.And => TextureFlags.BitwiseAnd, + SuatomOp.Or => TextureFlags.BitwiseOr, + SuatomOp.Xor => TextureFlags.BitwiseXor, + SuatomOp.Exch => TextureFlags.Swap, + _ => TextureFlags.Add + }; + } + + private static int GetComponents(SuSize size) + { + return size switch + { + SuSize.B64 => 2, + SuSize.B128 => 4, + SuSize.UB128 => 4, + _ => 1 + }; + } + + private static int GetComponentSizeInBytesLog2(SuSize size) + { + return size switch + { + SuSize.U8 => 0, + SuSize.S8 => 0, + SuSize.U16 => 1, + SuSize.S16 => 1, + SuSize.B32 => 2, + SuSize.B64 => 3, + SuSize.B128 => 4, + SuSize.UB128 => 4, + _ => 2 + }; + } + + private static TextureFormat GetTextureFormat(SuSize size) + { + return size switch + { + SuSize.U8 => TextureFormat.R8Uint, + SuSize.S8 => TextureFormat.R8Sint, + SuSize.U16 => TextureFormat.R16Uint, + SuSize.S16 => TextureFormat.R16Sint, + SuSize.B32 => TextureFormat.R32Uint, + SuSize.B64 => TextureFormat.R32G32Uint, + SuSize.B128 => TextureFormat.R32G32B32A32Uint, + SuSize.UB128 => TextureFormat.R32G32B32A32Uint, + _ => TextureFormat.R32Uint + }; + } + + private static SamplerType ConvertSamplerType(SuDim target) + { + return target switch + { + SuDim._1d => SamplerType.Texture1D, + SuDim._1dBuffer => SamplerType.TextureBuffer, + SuDim._1dArray => SamplerType.Texture1D | SamplerType.Array, + SuDim._2d => SamplerType.Texture2D, + SuDim._2dArray => SamplerType.Texture2D | SamplerType.Array, + SuDim._3d => SamplerType.Texture3D, + _ => SamplerType.None + }; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs new file mode 100644 index 00000000..caa9a775 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs @@ -0,0 +1,1312 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Collections.Generic; +using System.Numerics; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + private static readonly int[,] _maskLut = new int[,] + { + { 0b0001, 0b0010, 0b0100, 0b1000, 0b0011, 0b1001, 0b1010, 0b1100 }, + { 0b0111, 0b1011, 0b1101, 0b1110, 0b1111, 0b0000, 0b0000, 0b0000 } + }; + + public const bool Sample1DAs2D = true; + + private enum TexsType + { + Texs, + Tlds, + Tld4s + } + + public static void Tex(EmitterContext context) + { + InstTex op = context.GetOp<InstTex>(); + + EmitTex(context, TextureFlags.None, op.Dim, op.Lod, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, false, op.Dc, op.Aoffi); + } + + public static void TexB(EmitterContext context) + { + InstTexB op = context.GetOp<InstTexB>(); + + EmitTex(context, TextureFlags.Bindless, op.Dim, op.Lodb, 0, op.WMask, op.SrcA, op.SrcB, op.Dest, false, op.Dc, op.Aoffib); + } + + public static void Texs(EmitterContext context) + { + InstTexs op = context.GetOp<InstTexs>(); + + EmitTexs(context, TexsType.Texs, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Dest2, isF16: false); + } + + public static void TexsF16(EmitterContext context) + { + InstTexs op = context.GetOp<InstTexs>(); + + EmitTexs(context, TexsType.Texs, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Dest2, isF16: true); + } + + public static void Tld(EmitterContext context) + { + InstTld op = context.GetOp<InstTld>(); + + context.Config.SetUsedFeature(FeatureFlags.IntegerSampling); + + var lod = op.Lod ? Lod.Ll : Lod.Lz; + + EmitTex(context, TextureFlags.IntCoords, op.Dim, lod, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Ms, false, op.Toff); + } + + public static void TldB(EmitterContext context) + { + InstTldB op = context.GetOp<InstTldB>(); + + context.Config.SetUsedFeature(FeatureFlags.IntegerSampling); + + var flags = TextureFlags.IntCoords | TextureFlags.Bindless; + var lod = op.Lod ? Lod.Ll : Lod.Lz; + + EmitTex(context, flags, op.Dim, lod, 0, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Ms, false, op.Toff); + } + + public static void Tlds(EmitterContext context) + { + InstTlds op = context.GetOp<InstTlds>(); + + EmitTexs(context, TexsType.Tlds, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Dest2, isF16: false); + } + + public static void TldsF16(EmitterContext context) + { + InstTlds op = context.GetOp<InstTlds>(); + + EmitTexs(context, TexsType.Tlds, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Dest2, isF16: true); + } + + public static void Tld4(EmitterContext context) + { + InstTld4 op = context.GetOp<InstTld4>(); + + EmitTld4(context, op.Dim, op.TexComp, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Toff, op.Dc, isBindless: false); + } + + public static void Tld4B(EmitterContext context) + { + InstTld4B op = context.GetOp<InstTld4B>(); + + EmitTld4(context, op.Dim, op.TexComp, 0, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Toff, op.Dc, isBindless: true); + } + + public static void Tld4s(EmitterContext context) + { + InstTld4s op = context.GetOp<InstTld4s>(); + + EmitTexs(context, TexsType.Tld4s, op.TidB, 4, op.SrcA, op.SrcB, op.Dest, op.Dest2, isF16: false); + } + + public static void Tld4sF16(EmitterContext context) + { + InstTld4s op = context.GetOp<InstTld4s>(); + + EmitTexs(context, TexsType.Tld4s, op.TidB, 4, op.SrcA, op.SrcB, op.Dest, op.Dest2, isF16: true); + } + + public static void Tmml(EmitterContext context) + { + InstTmml op = context.GetOp<InstTmml>(); + + EmitTmml(context, op.Dim, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, isBindless: false); + } + + public static void TmmlB(EmitterContext context) + { + InstTmmlB op = context.GetOp<InstTmmlB>(); + + EmitTmml(context, op.Dim, 0, op.WMask, op.SrcA, op.SrcB, op.Dest, isBindless: true); + } + + public static void Txd(EmitterContext context) + { + InstTxd op = context.GetOp<InstTxd>(); + + EmitTxd(context, op.Dim, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Toff, isBindless: false); + } + + public static void TxdB(EmitterContext context) + { + InstTxdB op = context.GetOp<InstTxdB>(); + + EmitTxd(context, op.Dim, 0, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Toff, isBindless: true); + } + + public static void Txq(EmitterContext context) + { + InstTxq op = context.GetOp<InstTxq>(); + + EmitTxq(context, op.TexQuery, op.TidB, op.WMask, op.SrcA, op.Dest, isBindless: false); + } + + public static void TxqB(EmitterContext context) + { + InstTxqB op = context.GetOp<InstTxqB>(); + + EmitTxq(context, op.TexQuery, 0, op.WMask, op.SrcA, op.Dest, isBindless: true); + } + + private static void EmitTex( + EmitterContext context, + TextureFlags flags, + TexDim dimensions, + Lod lodMode, + int imm, + int componentMask, + int raIndex, + int rbIndex, + int rdIndex, + bool isMultisample, + bool hasDepthCompare, + bool hasOffset) + { + if (rdIndex == RegisterConsts.RegisterZeroIndex) + { + return; + } + + Operand Ra() + { + if (raIndex > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(raIndex++, RegisterType.Gpr)); + } + + Operand Rb() + { + if (rbIndex > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(rbIndex++, RegisterType.Gpr)); + } + + SamplerType type = ConvertSamplerType(dimensions); + + bool isArray = type.HasFlag(SamplerType.Array); + bool isBindless = flags.HasFlag(TextureFlags.Bindless); + + Operand arrayIndex = isArray ? Ra() : null; + + List<Operand> sourcesList = new List<Operand>(); + + if (isBindless) + { + sourcesList.Add(Rb()); + } + + bool hasLod = lodMode > Lod.Lz; + + if (type == SamplerType.Texture1D && (flags & ~TextureFlags.Bindless) == TextureFlags.IntCoords && !( + hasLod || + hasDepthCompare || + hasOffset || + isArray || + isMultisample)) + { + // For bindless, we don't have any way to know the texture type, + // so we assume it's texture buffer when the sampler type is 1D, since that's more common. + bool isTypeBuffer = isBindless || context.Config.GpuAccessor.QuerySamplerType(imm) == SamplerType.TextureBuffer; + if (isTypeBuffer) + { + type = SamplerType.TextureBuffer; + } + } + + int coordsCount = type.GetDimensions(); + + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(Ra()); + } + + bool is1DTo2D = false; + + if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D) + { + sourcesList.Add(ConstF(0)); + + type = SamplerType.Texture2D | (type & SamplerType.Array); + is1DTo2D = true; + } + + if (isArray) + { + sourcesList.Add(arrayIndex); + } + + Operand lodValue = hasLod ? Rb() : ConstF(0); + + Operand packedOffs = hasOffset ? Rb() : null; + + if (hasDepthCompare) + { + sourcesList.Add(Rb()); + + type |= SamplerType.Shadow; + } + + if ((lodMode == Lod.Lz || + lodMode == Lod.Ll || + lodMode == Lod.Lla) && !isMultisample && type != SamplerType.TextureBuffer) + { + sourcesList.Add(lodValue); + + flags |= TextureFlags.LodLevel; + } + + if (hasOffset) + { + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(context.BitfieldExtractS32(packedOffs, Const(index * 4), Const(4))); + } + + if (is1DTo2D) + { + sourcesList.Add(Const(0)); + } + + flags |= TextureFlags.Offset; + } + + if (lodMode == Lod.Lb || lodMode == Lod.Lba) + { + sourcesList.Add(lodValue); + + flags |= TextureFlags.LodBias; + } + + if (isMultisample) + { + sourcesList.Add(Rb()); + + type |= SamplerType.Multisample; + } + + Operand[] sources = sourcesList.ToArray(); + Operand[] dests = new Operand[BitOperations.PopCount((uint)componentMask)]; + + int outputIndex = 0; + + for (int i = 0; i < dests.Length; i++) + { + if (rdIndex + i >= RegisterConsts.RegisterZeroIndex) + { + break; + } + + dests[outputIndex++] = Register(rdIndex + i, RegisterType.Gpr); + } + + if (outputIndex != dests.Length) + { + Array.Resize(ref dests, outputIndex); + } + + int handle = !isBindless ? imm : 0; + + TextureOperation operation = context.CreateTextureOperation( + Instruction.TextureSample, + type, + flags, + handle, + componentMask, + dests, + sources); + + context.Add(operation); + } + + private static void EmitTexs( + EmitterContext context, + TexsType texsType, + int imm, + int writeMask, + int srcA, + int srcB, + int dest, + int dest2, + bool isF16) + { + if (dest == RegisterConsts.RegisterZeroIndex && dest2 == RegisterConsts.RegisterZeroIndex) + { + return; + } + + List<Operand> sourcesList = new List<Operand>(); + + Operand Ra() + { + if (srcA > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcA++, RegisterType.Gpr)); + } + + Operand Rb() + { + if (srcB > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcB++, RegisterType.Gpr)); + } + + void AddTextureOffset(int coordsCount, int stride, int size) + { + Operand packedOffs = Rb(); + + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(context.BitfieldExtractS32(packedOffs, Const(index * stride), Const(size))); + } + } + + SamplerType type; + TextureFlags flags; + + if (texsType == TexsType.Texs) + { + var texsOp = context.GetOp<InstTexs>(); + + type = ConvertSamplerType(texsOp.Target); + + if (type == SamplerType.None) + { + context.Config.GpuAccessor.Log("Invalid texture sampler type."); + return; + } + + flags = ConvertTextureFlags(texsOp.Target); + + // We don't need to handle 1D -> Buffer conversions here as + // only texture sample with integer coordinates can ever use buffer targets. + + if ((type & SamplerType.Array) != 0) + { + Operand arrayIndex = Ra(); + + sourcesList.Add(Ra()); + sourcesList.Add(Rb()); + + sourcesList.Add(arrayIndex); + + if ((type & SamplerType.Shadow) != 0) + { + sourcesList.Add(Rb()); + } + + if ((flags & TextureFlags.LodLevel) != 0) + { + sourcesList.Add(ConstF(0)); + } + } + else + { + switch (texsOp.Target) + { + case TexsTarget.Texture1DLodZero: + sourcesList.Add(Ra()); + + if (Sample1DAs2D) + { + sourcesList.Add(ConstF(0)); + + type &= ~SamplerType.Mask; + type |= SamplerType.Texture2D; + } + + sourcesList.Add(ConstF(0)); + break; + + case TexsTarget.Texture2D: + sourcesList.Add(Ra()); + sourcesList.Add(Rb()); + break; + + case TexsTarget.Texture2DLodZero: + sourcesList.Add(Ra()); + sourcesList.Add(Rb()); + sourcesList.Add(ConstF(0)); + break; + + case TexsTarget.Texture2DLodLevel: + case TexsTarget.Texture2DDepthCompare: + case TexsTarget.Texture3D: + case TexsTarget.TextureCube: + sourcesList.Add(Ra()); + sourcesList.Add(Ra()); + sourcesList.Add(Rb()); + break; + + case TexsTarget.Texture2DLodZeroDepthCompare: + case TexsTarget.Texture3DLodZero: + sourcesList.Add(Ra()); + sourcesList.Add(Ra()); + sourcesList.Add(Rb()); + sourcesList.Add(ConstF(0)); + break; + + case TexsTarget.Texture2DLodLevelDepthCompare: + case TexsTarget.TextureCubeLodLevel: + sourcesList.Add(Ra()); + sourcesList.Add(Ra()); + sourcesList.Add(Rb()); + sourcesList.Add(Rb()); + break; + } + } + } + else if (texsType == TexsType.Tlds) + { + var tldsOp = context.GetOp<InstTlds>(); + + type = ConvertSamplerType(tldsOp.Target); + + if (type == SamplerType.None) + { + context.Config.GpuAccessor.Log("Invalid texel fetch sampler type."); + return; + } + + context.Config.SetUsedFeature(FeatureFlags.IntegerSampling); + + flags = ConvertTextureFlags(tldsOp.Target) | TextureFlags.IntCoords; + + if (tldsOp.Target == TldsTarget.Texture1DLodZero && + context.Config.GpuAccessor.QuerySamplerType(tldsOp.TidB) == SamplerType.TextureBuffer) + { + type = SamplerType.TextureBuffer; + flags &= ~TextureFlags.LodLevel; + } + + switch (tldsOp.Target) + { + case TldsTarget.Texture1DLodZero: + sourcesList.Add(Ra()); + + if (type != SamplerType.TextureBuffer) + { + if (Sample1DAs2D) + { + sourcesList.Add(ConstF(0)); + + type &= ~SamplerType.Mask; + type |= SamplerType.Texture2D; + } + + sourcesList.Add(ConstF(0)); + } + break; + + case TldsTarget.Texture1DLodLevel: + sourcesList.Add(Ra()); + + if (Sample1DAs2D) + { + sourcesList.Add(ConstF(0)); + + type &= ~SamplerType.Mask; + type |= SamplerType.Texture2D; + } + + sourcesList.Add(Rb()); + break; + + case TldsTarget.Texture2DLodZero: + sourcesList.Add(Ra()); + sourcesList.Add(Rb()); + sourcesList.Add(Const(0)); + break; + + case TldsTarget.Texture2DLodZeroOffset: + sourcesList.Add(Ra()); + sourcesList.Add(Ra()); + sourcesList.Add(Const(0)); + break; + + case TldsTarget.Texture2DLodZeroMultisample: + case TldsTarget.Texture2DLodLevel: + case TldsTarget.Texture2DLodLevelOffset: + sourcesList.Add(Ra()); + sourcesList.Add(Ra()); + sourcesList.Add(Rb()); + break; + + case TldsTarget.Texture3DLodZero: + sourcesList.Add(Ra()); + sourcesList.Add(Ra()); + sourcesList.Add(Rb()); + sourcesList.Add(Const(0)); + break; + + case TldsTarget.Texture2DArrayLodZero: + sourcesList.Add(Rb()); + sourcesList.Add(Rb()); + sourcesList.Add(Ra()); + sourcesList.Add(Const(0)); + break; + } + + if ((flags & TextureFlags.Offset) != 0) + { + AddTextureOffset(type.GetDimensions(), 4, 4); + } + } + else if (texsType == TexsType.Tld4s) + { + var tld4sOp = context.GetOp<InstTld4s>(); + + if (!(tld4sOp.Dc || tld4sOp.Aoffi)) + { + sourcesList.Add(Ra()); + sourcesList.Add(Rb()); + } + else + { + sourcesList.Add(Ra()); + sourcesList.Add(Ra()); + } + + type = SamplerType.Texture2D; + flags = TextureFlags.Gather; + + if (tld4sOp.Dc) + { + sourcesList.Add(Rb()); + + type |= SamplerType.Shadow; + } + + if (tld4sOp.Aoffi) + { + AddTextureOffset(type.GetDimensions(), 8, 6); + + flags |= TextureFlags.Offset; + } + + sourcesList.Add(Const((int)tld4sOp.TexComp)); + } + else + { + throw new ArgumentException($"Invalid TEXS type \"{texsType}\"."); + } + + Operand[] sources = sourcesList.ToArray(); + + Operand[] rd0 = new Operand[2] { ConstF(0), ConstF(0) }; + Operand[] rd1 = new Operand[2] { ConstF(0), ConstF(0) }; + + int handle = imm; + int componentMask = _maskLut[dest2 == RegisterConsts.RegisterZeroIndex ? 0 : 1, writeMask]; + + int componentsCount = BitOperations.PopCount((uint)componentMask); + + Operand[] dests = new Operand[componentsCount]; + + int outputIndex = 0; + + for (int i = 0; i < componentsCount; i++) + { + int high = i >> 1; + int low = i & 1; + + if (isF16) + { + dests[outputIndex++] = high != 0 + ? (rd1[low] = Local()) + : (rd0[low] = Local()); + } + else + { + int rdIndex = high != 0 ? dest2 : dest; + + if (rdIndex < RegisterConsts.RegisterZeroIndex) + { + rdIndex += low; + } + + dests[outputIndex++] = Register(rdIndex, RegisterType.Gpr); + } + } + + if (outputIndex != dests.Length) + { + Array.Resize(ref dests, outputIndex); + } + + TextureOperation operation = context.CreateTextureOperation( + Instruction.TextureSample, + type, + flags, + handle, + componentMask, + dests, + sources); + + context.Add(operation); + + if (isF16) + { + context.Copy(Register(dest, RegisterType.Gpr), context.PackHalf2x16(rd0[0], rd0[1])); + context.Copy(Register(dest2, RegisterType.Gpr), context.PackHalf2x16(rd1[0], rd1[1])); + } + } + + private static void EmitTld4( + EmitterContext context, + TexDim dimensions, + TexComp component, + int imm, + int componentMask, + int srcA, + int srcB, + int dest, + TexOffset offset, + bool hasDepthCompare, + bool isBindless) + { + if (dest == RegisterConsts.RegisterZeroIndex) + { + return; + } + + Operand Ra() + { + if (srcA > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcA++, RegisterType.Gpr)); + } + + Operand Rb() + { + if (srcB > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcB++, RegisterType.Gpr)); + } + + bool isArray = + dimensions == TexDim.Array1d || + dimensions == TexDim.Array2d || + dimensions == TexDim.Array3d || + dimensions == TexDim.ArrayCube; + + Operand arrayIndex = isArray ? Ra() : null; + + List<Operand> sourcesList = new List<Operand>(); + + SamplerType type = ConvertSamplerType(dimensions); + TextureFlags flags = TextureFlags.Gather; + + if (isBindless) + { + sourcesList.Add(Rb()); + + flags |= TextureFlags.Bindless; + } + + int coordsCount = type.GetDimensions(); + + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(Ra()); + } + + bool is1DTo2D = Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D; + + if (is1DTo2D) + { + sourcesList.Add(ConstF(0)); + + type = SamplerType.Texture2D | (type & SamplerType.Array); + } + + if (isArray) + { + sourcesList.Add(arrayIndex); + } + + Operand[] packedOffs = new Operand[2]; + + bool hasAnyOffset = offset == TexOffset.Aoffi || offset == TexOffset.Ptp; + + packedOffs[0] = hasAnyOffset ? Rb() : null; + packedOffs[1] = offset == TexOffset.Ptp ? Rb() : null; + + if (hasDepthCompare) + { + sourcesList.Add(Rb()); + + type |= SamplerType.Shadow; + } + + if (hasAnyOffset) + { + int offsetTexelsCount = offset == TexOffset.Ptp ? 4 : 1; + + for (int index = 0; index < coordsCount * offsetTexelsCount; index++) + { + Operand packed = packedOffs[(index >> 2) & 1]; + + sourcesList.Add(context.BitfieldExtractS32(packed, Const((index & 3) * 8), Const(6))); + } + + if (is1DTo2D) + { + for (int index = 0; index < offsetTexelsCount; index++) + { + sourcesList.Add(Const(0)); + } + } + + flags |= offset == TexOffset.Ptp ? TextureFlags.Offsets : TextureFlags.Offset; + } + + sourcesList.Add(Const((int)component)); + + Operand[] sources = sourcesList.ToArray(); + Operand[] dests = new Operand[BitOperations.PopCount((uint)componentMask)]; + + int outputIndex = 0; + + for (int i = 0; i < dests.Length; i++) + { + if (dest + i >= RegisterConsts.RegisterZeroIndex) + { + break; + } + + dests[outputIndex++] = Register(dest + i, RegisterType.Gpr); + } + + if (outputIndex != dests.Length) + { + Array.Resize(ref dests, outputIndex); + } + + int handle = imm; + + TextureOperation operation = context.CreateTextureOperation( + Instruction.TextureSample, + type, + flags, + handle, + componentMask, + dests, + sources); + + context.Add(operation); + } + + private static void EmitTmml( + EmitterContext context, + TexDim dimensions, + int imm, + int componentMask, + int srcA, + int srcB, + int dest, + bool isBindless) + { + if (dest == RegisterConsts.RegisterZeroIndex) + { + return; + } + + Operand Ra() + { + if (srcA > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcA++, RegisterType.Gpr)); + } + + Operand Rb() + { + if (srcB > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcB++, RegisterType.Gpr)); + } + + TextureFlags flags = TextureFlags.None; + + List<Operand> sourcesList = new List<Operand>(); + + if (isBindless) + { + sourcesList.Add(Rb()); + + flags |= TextureFlags.Bindless; + } + + SamplerType type = ConvertSamplerType(dimensions); + + int coordsCount = type.GetDimensions(); + + bool isArray = + dimensions == TexDim.Array1d || + dimensions == TexDim.Array2d || + dimensions == TexDim.Array3d || + dimensions == TexDim.ArrayCube; + + Operand arrayIndex = isArray ? Ra() : null; + + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(Ra()); + } + + if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D) + { + sourcesList.Add(ConstF(0)); + + type = SamplerType.Texture2D | (type & SamplerType.Array); + } + + if (isArray) + { + sourcesList.Add(arrayIndex); + } + + Operand[] sources = sourcesList.ToArray(); + + Operand GetDest() + { + if (dest >= RegisterConsts.RegisterZeroIndex) + { + return null; + } + + return Register(dest++, RegisterType.Gpr); + } + + int handle = imm; + + for (int compMask = componentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++) + { + if ((compMask & 1) != 0) + { + Operand destOperand = GetDest(); + + if (destOperand == null) + { + break; + } + + // Components z and w aren't standard, we return 0 in this case and add a comment. + if (compIndex >= 2) + { + context.Add(new CommentNode("Unsupported component z or w found")); + context.Copy(destOperand, Const(0)); + } + else + { + Operand tempDest = Local(); + + TextureOperation operation = context.CreateTextureOperation( + Instruction.Lod, + type, + flags, + handle, + compIndex ^ 1, // The instruction component order is the inverse of GLSL's. + new[] { tempDest }, + sources); + + context.Add(operation); + + tempDest = context.FPMultiply(tempDest, ConstF(256.0f)); + + Operand fixedPointValue = context.FP32ConvertToS32(tempDest); + + context.Copy(destOperand, fixedPointValue); + } + } + } + } + + private static void EmitTxd( + EmitterContext context, + TexDim dimensions, + int imm, + int componentMask, + int srcA, + int srcB, + int dest, + bool hasOffset, + bool isBindless) + { + if (dest == RegisterConsts.RegisterZeroIndex) + { + return; + } + + Operand Ra() + { + if (srcA > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcA++, RegisterType.Gpr)); + } + + Operand Rb() + { + if (srcB > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcB++, RegisterType.Gpr)); + } + + TextureFlags flags = TextureFlags.Derivatives; + + List<Operand> sourcesList = new List<Operand>(); + + if (isBindless) + { + sourcesList.Add(Ra()); + + flags |= TextureFlags.Bindless; + } + + SamplerType type = ConvertSamplerType(dimensions); + + int coordsCount = type.GetDimensions(); + + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(Ra()); + } + + bool is1DTo2D = Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D; + + if (is1DTo2D) + { + sourcesList.Add(ConstF(0)); + + type = SamplerType.Texture2D | (type & SamplerType.Array); + } + + Operand packedParams = Ra(); + + bool isArray = + dimensions == TexDim.Array1d || + dimensions == TexDim.Array2d || + dimensions == TexDim.Array3d || + dimensions == TexDim.ArrayCube; + + if (isArray) + { + sourcesList.Add(context.BitwiseAnd(packedParams, Const(0xffff))); + } + + // Derivatives (X and Y). + for (int dIndex = 0; dIndex < 2 * coordsCount; dIndex++) + { + sourcesList.Add(Rb()); + + if (is1DTo2D) + { + sourcesList.Add(ConstF(0)); + } + } + + if (hasOffset) + { + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(context.BitfieldExtractS32(packedParams, Const(16 + index * 4), Const(4))); + } + + if (is1DTo2D) + { + sourcesList.Add(Const(0)); + } + + flags |= TextureFlags.Offset; + } + + Operand[] sources = sourcesList.ToArray(); + Operand[] dests = new Operand[BitOperations.PopCount((uint)componentMask)]; + + int outputIndex = 0; + + for (int i = 0; i < dests.Length; i++) + { + if (dest + i >= RegisterConsts.RegisterZeroIndex) + { + break; + } + + dests[outputIndex++] = Register(dest + i, RegisterType.Gpr); + } + + if (outputIndex != dests.Length) + { + Array.Resize(ref dests, outputIndex); + } + + int handle = imm; + + TextureOperation operation = context.CreateTextureOperation( + Instruction.TextureSample, + type, + flags, + handle, + componentMask, + dests, + sources); + + context.Add(operation); + } + + private static void EmitTxq( + EmitterContext context, + TexQuery query, + int imm, + int componentMask, + int srcA, + int dest, + bool isBindless) + { + if (dest == RegisterConsts.RegisterZeroIndex) + { + return; + } + + context.Config.SetUsedFeature(FeatureFlags.IntegerSampling); + + // TODO: Validate and use query. + Instruction inst = Instruction.TextureSize; + TextureFlags flags = isBindless ? TextureFlags.Bindless : TextureFlags.None; + + Operand Ra() + { + if (srcA > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcA++, RegisterType.Gpr)); + } + + List<Operand> sourcesList = new List<Operand>(); + + if (isBindless) + { + sourcesList.Add(Ra()); + } + + sourcesList.Add(Ra()); + + Operand[] sources = sourcesList.ToArray(); + + Operand GetDest() + { + if (dest >= RegisterConsts.RegisterZeroIndex) + { + return null; + } + + return Register(dest++, RegisterType.Gpr); + } + + SamplerType type; + + if (isBindless) + { + type = (componentMask & 4) != 0 ? SamplerType.Texture3D : SamplerType.Texture2D; + } + else + { + type = context.Config.GpuAccessor.QuerySamplerType(imm); + } + + for (int compMask = componentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++) + { + if ((compMask & 1) != 0) + { + Operand destOperand = GetDest(); + + if (destOperand == null) + { + break; + } + + TextureOperation operation = context.CreateTextureOperation( + inst, + type, + flags, + imm, + compIndex, + new[] { destOperand }, + sources); + + context.Add(operation); + } + } + } + + private static SamplerType ConvertSamplerType(TexDim dimensions) + { + return dimensions switch + { + TexDim._1d => SamplerType.Texture1D, + TexDim.Array1d => SamplerType.Texture1D | SamplerType.Array, + TexDim._2d => SamplerType.Texture2D, + TexDim.Array2d => SamplerType.Texture2D | SamplerType.Array, + TexDim._3d => SamplerType.Texture3D, + TexDim.Array3d => SamplerType.Texture3D | SamplerType.Array, + TexDim.Cube => SamplerType.TextureCube, + TexDim.ArrayCube => SamplerType.TextureCube | SamplerType.Array, + _ => throw new ArgumentException($"Invalid texture dimensions \"{dimensions}\".") + }; + } + + private static SamplerType ConvertSamplerType(TexsTarget type) + { + switch (type) + { + case TexsTarget.Texture1DLodZero: + return SamplerType.Texture1D; + + case TexsTarget.Texture2D: + case TexsTarget.Texture2DLodZero: + case TexsTarget.Texture2DLodLevel: + return SamplerType.Texture2D; + + case TexsTarget.Texture2DDepthCompare: + case TexsTarget.Texture2DLodLevelDepthCompare: + case TexsTarget.Texture2DLodZeroDepthCompare: + return SamplerType.Texture2D | SamplerType.Shadow; + + case TexsTarget.Texture2DArray: + case TexsTarget.Texture2DArrayLodZero: + return SamplerType.Texture2D | SamplerType.Array; + + case TexsTarget.Texture2DArrayLodZeroDepthCompare: + return SamplerType.Texture2D | SamplerType.Array | SamplerType.Shadow; + + case TexsTarget.Texture3D: + case TexsTarget.Texture3DLodZero: + return SamplerType.Texture3D; + + case TexsTarget.TextureCube: + case TexsTarget.TextureCubeLodLevel: + return SamplerType.TextureCube; + } + + return SamplerType.None; + } + + private static SamplerType ConvertSamplerType(TldsTarget type) + { + switch (type) + { + case TldsTarget.Texture1DLodZero: + case TldsTarget.Texture1DLodLevel: + return SamplerType.Texture1D; + + case TldsTarget.Texture2DLodZero: + case TldsTarget.Texture2DLodZeroOffset: + case TldsTarget.Texture2DLodLevel: + case TldsTarget.Texture2DLodLevelOffset: + return SamplerType.Texture2D; + + case TldsTarget.Texture2DLodZeroMultisample: + return SamplerType.Texture2D | SamplerType.Multisample; + + case TldsTarget.Texture3DLodZero: + return SamplerType.Texture3D; + + case TldsTarget.Texture2DArrayLodZero: + return SamplerType.Texture2D | SamplerType.Array; + } + + return SamplerType.None; + } + + private static TextureFlags ConvertTextureFlags(TexsTarget type) + { + switch (type) + { + case TexsTarget.Texture1DLodZero: + case TexsTarget.Texture2DLodZero: + case TexsTarget.Texture2DLodLevel: + case TexsTarget.Texture2DLodLevelDepthCompare: + case TexsTarget.Texture2DLodZeroDepthCompare: + case TexsTarget.Texture2DArrayLodZero: + case TexsTarget.Texture2DArrayLodZeroDepthCompare: + case TexsTarget.Texture3DLodZero: + case TexsTarget.TextureCubeLodLevel: + return TextureFlags.LodLevel; + + case TexsTarget.Texture2D: + case TexsTarget.Texture2DDepthCompare: + case TexsTarget.Texture2DArray: + case TexsTarget.Texture3D: + case TexsTarget.TextureCube: + return TextureFlags.None; + } + + return TextureFlags.None; + } + + private static TextureFlags ConvertTextureFlags(TldsTarget type) + { + switch (type) + { + case TldsTarget.Texture1DLodZero: + case TldsTarget.Texture1DLodLevel: + case TldsTarget.Texture2DLodZero: + case TldsTarget.Texture2DLodLevel: + case TldsTarget.Texture2DLodZeroMultisample: + case TldsTarget.Texture3DLodZero: + case TldsTarget.Texture2DArrayLodZero: + return TextureFlags.LodLevel; + + case TldsTarget.Texture2DLodZeroOffset: + case TldsTarget.Texture2DLodLevelOffset: + return TextureFlags.LodLevel | TextureFlags.Offset; + } + + return TextureFlags.None; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs new file mode 100644 index 00000000..2d84c5bd --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs @@ -0,0 +1,118 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void Vmad(EmitterContext context) + { + InstVmad op = context.GetOp<InstVmad>(); + + bool aSigned = (op.ASelect & VectorSelect.S8B0) != 0; + bool bSigned = (op.BSelect & VectorSelect.S8B0) != 0; + + Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect); + Operand srcC = context.INegate(GetSrcReg(context, op.SrcC), op.AvgMode == AvgMode.NegB); + Operand srcB; + + if (op.BVideo) + { + srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect); + } + else + { + int imm = op.Imm16; + + if (bSigned) + { + imm = (imm << 16) >> 16; + } + + srcB = Const(imm); + } + + Operand productLow = context.IMultiply(srcA, srcB); + Operand productHigh; + + if (aSigned == bSigned) + { + productHigh = aSigned + ? context.MultiplyHighS32(srcA, srcB) + : context.MultiplyHighU32(srcA, srcB); + } + else + { + Operand temp = aSigned + ? context.IMultiply(srcB, context.ShiftRightS32(srcA, Const(31))) + : context.IMultiply(srcA, context.ShiftRightS32(srcB, Const(31))); + + productHigh = context.IAdd(temp, context.MultiplyHighU32(srcA, srcB)); + } + + if (op.AvgMode == AvgMode.NegA) + { + (productLow, productHigh) = InstEmitAluHelper.NegateLong(context, productLow, productHigh); + } + + Operand resLow = InstEmitAluHelper.AddWithCarry(context, productLow, srcC, out Operand sumCarry); + Operand resHigh = context.IAdd(productHigh, sumCarry); + + if (op.AvgMode == AvgMode.PlusOne) + { + resLow = InstEmitAluHelper.AddWithCarry(context, resLow, Const(1), out Operand poCarry); + resHigh = context.IAdd(resHigh, poCarry); + } + + bool resSigned = op.ASelect == VectorSelect.S32 || + op.BSelect == VectorSelect.S32 || + op.AvgMode == AvgMode.NegB || + op.AvgMode == AvgMode.NegA; + + int shift = op.VideoScale switch + { + VideoScale.Shr7 => 7, + VideoScale.Shr15 => 15, + _ => 0 + }; + + if (shift != 0) + { + // Low = (Low >> Shift) | (High << (32 - Shift)) + // High >>= Shift + resLow = context.ShiftRightU32(resLow, Const(shift)); + resLow = context.BitwiseOr(resLow, context.ShiftLeft(resHigh, Const(32 - shift))); + resHigh = resSigned + ? context.ShiftRightS32(resHigh, Const(shift)) + : context.ShiftRightU32(resHigh, Const(shift)); + } + + Operand res = resLow; + + if (op.Sat) + { + Operand sign = context.ShiftRightS32(resHigh, Const(31)); + + if (resSigned) + { + Operand overflow = context.ICompareNotEqual(resHigh, context.ShiftRightS32(resLow, Const(31))); + Operand clampValue = context.ConditionalSelect(sign, Const(int.MinValue), Const(int.MaxValue)); + res = context.ConditionalSelect(overflow, clampValue, resLow); + } + else + { + Operand overflow = context.ICompareNotEqual(resHigh, Const(0)); + res = context.ConditionalSelect(overflow, context.BitwiseNot(sign), resLow); + } + } + + context.Copy(GetDest(op.Dest), res); + + // TODO: CC. + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs new file mode 100644 index 00000000..67b185ab --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs @@ -0,0 +1,183 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void Vmnmx(EmitterContext context) + { + InstVmnmx op = context.GetOp<InstVmnmx>(); + + Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect); + Operand srcC = GetSrcReg(context, op.SrcC); + Operand srcB; + + if (op.BVideo) + { + srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect); + } + else + { + int imm = op.Imm16; + + if ((op.BSelect & VectorSelect.S8B0) != 0) + { + imm = (imm << 16) >> 16; + } + + srcB = Const(imm); + } + + Operand res; + + bool resSigned; + + if ((op.ASelect & VectorSelect.S8B0) != (op.BSelect & VectorSelect.S8B0)) + { + // Signedness is different, but for max, result will always fit a U32, + // since one of the inputs can't be negative, and the result is the one + // with highest value. For min, it will always fit on a S32, since + // one of the input can't be greater than INT_MAX and we want the lowest value. + resSigned = !op.Mn; + + res = op.Mn ? context.IMaximumU32(srcA, srcB) : context.IMinimumS32(srcA, srcB); + + if ((op.ASelect & VectorSelect.S8B0) != 0) + { + Operand isBGtIntMax = context.ICompareLess(srcB, Const(0)); + + res = context.ConditionalSelect(isBGtIntMax, srcB, res); + } + else + { + Operand isAGtIntMax = context.ICompareLess(srcA, Const(0)); + + res = context.ConditionalSelect(isAGtIntMax, srcA, res); + } + } + else + { + // Ra and Rb have the same signedness, so doesn't matter which one we test. + resSigned = (op.ASelect & VectorSelect.S8B0) != 0; + + if (op.Mn) + { + res = resSigned + ? context.IMaximumS32(srcA, srcB) + : context.IMaximumU32(srcA, srcB); + } + else + { + res = resSigned + ? context.IMinimumS32(srcA, srcB) + : context.IMinimumU32(srcA, srcB); + } + } + + if (op.Sat) + { + if (op.DFormat && !resSigned) + { + res = context.IMinimumU32(res, Const(int.MaxValue)); + } + else if (!op.DFormat && resSigned) + { + res = context.IMaximumS32(res, Const(0)); + } + } + + switch (op.VideoOp) + { + case VideoOp.Acc: + res = context.IAdd(res, srcC); + break; + case VideoOp.Max: + res = op.DFormat ? context.IMaximumS32(res, srcC) : context.IMaximumU32(res, srcC); + break; + case VideoOp.Min: + res = op.DFormat ? context.IMinimumS32(res, srcC) : context.IMinimumU32(res, srcC); + break; + case VideoOp.Mrg16h: + res = context.BitfieldInsert(srcC, res, Const(16), Const(16)); + break; + case VideoOp.Mrg16l: + res = context.BitfieldInsert(srcC, res, Const(0), Const(16)); + break; + case VideoOp.Mrg8b0: + res = context.BitfieldInsert(srcC, res, Const(0), Const(8)); + break; + case VideoOp.Mrg8b2: + res = context.BitfieldInsert(srcC, res, Const(16), Const(8)); + break; + } + + context.Copy(GetDest(op.Dest), res); + } + + public static void Vsetp(EmitterContext context) + { + InstVsetp op = context.GetOp<InstVsetp>(); + + Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect); + Operand srcB; + + if (op.BVideo) + { + srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect); + } + else + { + int imm = op.Imm16; + + if ((op.BSelect & VectorSelect.S8B0) != 0) + { + imm = (imm << 16) >> 16; + } + + srcB = Const(imm); + } + + Operand p0Res; + + bool signedA = (op.ASelect & VectorSelect.S8B0) != 0; + bool signedB = (op.BSelect & VectorSelect.S8B0) != 0; + + if (signedA != signedB) + { + bool a32 = (op.ASelect & ~VectorSelect.S8B0) == VectorSelect.U32; + bool b32 = (op.BSelect & ~VectorSelect.S8B0) == VectorSelect.U32; + + if (!a32 && !b32) + { + // Both values are extended small integer and can always fit in a S32, just do a signed comparison. + p0Res = GetIntComparison(context, op.VComp, srcA, srcB, isSigned: true, extended: false); + } + else + { + // TODO: Mismatching sign case. + p0Res = Const(0); + } + } + else + { + // Sign matches, just do a regular comparison. + p0Res = GetIntComparison(context, op.VComp, srcA, srcB, signedA, extended: false); + } + + Operand p1Res = context.BitwiseNot(p0Res); + + Operand pred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + p0Res = InstEmitAluHelper.GetPredLogicalOp(context, op.BoolOp, p0Res, pred); + p1Res = InstEmitAluHelper.GetPredLogicalOp(context, op.BoolOp, p1Res, pred); + + context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res); + context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitWarp.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitWarp.cs new file mode 100644 index 00000000..3c833613 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitWarp.cs @@ -0,0 +1,84 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void Fswzadd(EmitterContext context) + { + InstFswzadd op = context.GetOp<InstFswzadd>(); + + Operand srcA = GetSrcReg(context, op.SrcA); + Operand srcB = GetSrcReg(context, op.SrcB); + Operand dest = GetDest(op.Dest); + + context.Copy(dest, context.FPSwizzleAdd(srcA, srcB, op.PnWord)); + + InstEmitAluHelper.SetFPZnFlags(context, dest, op.WriteCC); + } + + public static void Shfl(EmitterContext context) + { + InstShfl op = context.GetOp<InstShfl>(); + + Operand pred = Register(op.DestPred, RegisterType.Predicate); + + Operand srcA = GetSrcReg(context, op.SrcA); + + Operand srcB = op.BFixShfl ? Const(op.SrcBImm) : GetSrcReg(context, op.SrcB); + Operand srcC = op.CFixShfl ? Const(op.SrcCImm) : GetSrcReg(context, op.SrcC); + + (Operand res, Operand valid) = op.ShflMode switch + { + ShflMode.Idx => context.Shuffle(srcA, srcB, srcC), + ShflMode.Up => context.ShuffleUp(srcA, srcB, srcC), + ShflMode.Down => context.ShuffleDown(srcA, srcB, srcC), + ShflMode.Bfly => context.ShuffleXor(srcA, srcB, srcC), + _ => (null, null) + }; + + context.Copy(GetDest(op.Dest), res); + context.Copy(pred, valid); + } + + public static void Vote(EmitterContext context) + { + InstVote op = context.GetOp<InstVote>(); + + Operand pred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + Operand res = null; + + switch (op.VoteMode) + { + case VoteMode.All: + res = context.VoteAll(pred); + break; + case VoteMode.Any: + res = context.VoteAny(pred); + break; + case VoteMode.Eq: + res = context.VoteAllEqual(pred); + break; + } + + if (res != null) + { + context.Copy(Register(op.VpDest, RegisterType.Predicate), res); + } + else + { + context.Config.GpuAccessor.Log($"Invalid vote operation: {op.VoteMode}."); + } + + if (op.Dest != RegisterConsts.RegisterZeroIndex) + { + context.Copy(GetDest(op.Dest), context.Ballot(pred)); + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitter.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitter.cs new file mode 100644 index 00000000..91c740b6 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitter.cs @@ -0,0 +1,6 @@ +using Ryujinx.Graphics.Shader.Translation; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + delegate void InstEmitter(EmitterContext context); +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/Lop3Expression.cs b/src/Ryujinx.Graphics.Shader/Instructions/Lop3Expression.cs new file mode 100644 index 00000000..6217ce53 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/Lop3Expression.cs @@ -0,0 +1,141 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static class Lop3Expression + { + private enum TruthTable : byte + { + False = 0x00, // false + True = 0xff, // true + In = 0xf0, // a + And2 = 0xc0, // a & b + Or2 = 0xfc, // a | b + Xor2 = 0x3c, // a ^ b + And3 = 0x80, // a & b & c + Or3 = 0xfe, // a | b | c + XorAnd = 0x60, // a & (b ^ c) + XorOr = 0xf6, // a | (b ^ c) + OrAnd = 0xe0, // a & (b | c) + AndOr = 0xf8, // a | (b & c) + Onehot = 0x16, // (a & !b & !c) | (!a & b & !c) | (!a & !b & c) - Only one value is true. + Majority = 0xe8, // Popcount(a, b, c) >= 2 + Gamble = 0x81, // (a & b & c) | (!a & !b & !c) - All on or all off + InverseGamble = 0x7e, // Inverse of Gamble + Dot = 0x1a, // a ^ (c | (a & b)) + Mux = 0xca, // a ? b : c + AndXor = 0x78, // a ^ (b & c) + OrXor = 0x1e, // a ^ (b | c) + Xor3 = 0x96, // a ^ b ^ c + } + + public static Operand GetFromTruthTable(EmitterContext context, Operand srcA, Operand srcB, Operand srcC, int imm) + { + for (int i = 0; i < 0x40; i++) + { + TruthTable currImm = (TruthTable)imm; + + Operand x = srcA; + Operand y = srcB; + Operand z = srcC; + + if ((i & 0x01) != 0) + { + (x, y) = (y, x); + currImm = PermuteTable(currImm, 7, 6, 3, 2, 5, 4, 1, 0); + } + + if ((i & 0x02) != 0) + { + (x, z) = (z, x); + currImm = PermuteTable(currImm, 7, 3, 5, 1, 6, 2, 4, 0); + } + + if ((i & 0x04) != 0) + { + (y, z) = (z, y); + currImm = PermuteTable(currImm, 7, 5, 6, 4, 3, 1, 2, 0); + } + + if ((i & 0x08) != 0) + { + x = context.BitwiseNot(x); + currImm = PermuteTable(currImm, 3, 2, 1, 0, 7, 6, 5, 4); + } + + if ((i & 0x10) != 0) + { + y = context.BitwiseNot(y); + currImm = PermuteTable(currImm, 5, 4, 7, 6, 1, 0, 3, 2); + } + + if ((i & 0x20) != 0) + { + z = context.BitwiseNot(z); + currImm = PermuteTable(currImm, 6, 7, 4, 5, 2, 3, 0, 1); + } + + Operand result = GetExpr(currImm, context, x, y, z); + if (result != null) + { + return result; + } + + Operand notResult = GetExpr((TruthTable)((~(int)currImm) & 0xff), context, x, y, z); + if (notResult != null) + { + return context.BitwiseNot(notResult); + } + } + + return null; + } + + private static Operand GetExpr(TruthTable imm, EmitterContext context, Operand x, Operand y, Operand z) + { + return imm switch + { + TruthTable.False => Const(0), + TruthTable.True => Const(-1), + TruthTable.In => x, + TruthTable.And2 => context.BitwiseAnd(x, y), + TruthTable.Or2 => context.BitwiseOr(x, y), + TruthTable.Xor2 => context.BitwiseExclusiveOr(x, y), + TruthTable.And3 => context.BitwiseAnd(x, context.BitwiseAnd(y, z)), + TruthTable.Or3 => context.BitwiseOr(x, context.BitwiseOr(y, z)), + TruthTable.XorAnd => context.BitwiseAnd(x, context.BitwiseExclusiveOr(y, z)), + TruthTable.XorOr => context.BitwiseOr(x, context.BitwiseExclusiveOr(y, z)), + TruthTable.OrAnd => context.BitwiseAnd(x, context.BitwiseOr(y, z)), + TruthTable.AndOr => context.BitwiseOr(x, context.BitwiseAnd(y, z)), + TruthTable.Onehot => context.BitwiseExclusiveOr(context.BitwiseOr(x, y), context.BitwiseOr(z, context.BitwiseAnd(x, y))), + TruthTable.Majority => context.BitwiseAnd(context.BitwiseOr(x, y), context.BitwiseOr(z, context.BitwiseAnd(x, y))), + TruthTable.InverseGamble => context.BitwiseOr(context.BitwiseExclusiveOr(x, y), context.BitwiseExclusiveOr(x, z)), + TruthTable.Dot => context.BitwiseAnd(context.BitwiseExclusiveOr(x, z), context.BitwiseOr(context.BitwiseNot(y), z)), + TruthTable.Mux => context.BitwiseOr(context.BitwiseAnd(x, y), context.BitwiseAnd(context.BitwiseNot(x), z)), + TruthTable.AndXor => context.BitwiseExclusiveOr(x, context.BitwiseAnd(y, z)), + TruthTable.OrXor => context.BitwiseExclusiveOr(x, context.BitwiseOr(y, z)), + TruthTable.Xor3 => context.BitwiseExclusiveOr(x, context.BitwiseExclusiveOr(y, z)), + _ => null + }; + } + + private static TruthTable PermuteTable(TruthTable imm, int bit7, int bit6, int bit5, int bit4, int bit3, int bit2, int bit1, int bit0) + { + int result = 0; + + result |= (((int)imm >> 0) & 1) << bit0; + result |= (((int)imm >> 1) & 1) << bit1; + result |= (((int)imm >> 2) & 1) << bit2; + result |= (((int)imm >> 3) & 1) << bit3; + result |= (((int)imm >> 4) & 1) << bit4; + result |= (((int)imm >> 5) & 1) << bit5; + result |= (((int)imm >> 6) & 1) << bit6; + result |= (((int)imm >> 7) & 1) << bit7; + + return (TruthTable)result; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/BasicBlock.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/BasicBlock.cs new file mode 100644 index 00000000..2aca118b --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/BasicBlock.cs @@ -0,0 +1,91 @@ +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Shader.IntermediateRepresentation +{ + class BasicBlock + { + public int Index { get; set; } + + public LinkedList<INode> Operations { get; } + + private BasicBlock _next; + private BasicBlock _branch; + + public BasicBlock Next + { + get => _next; + set => _next = AddSuccessor(_next, value); + } + + public BasicBlock Branch + { + get => _branch; + set => _branch = AddSuccessor(_branch, value); + } + + public bool HasBranch => _branch != null; + public bool Reachable => Index == 0 || Predecessors.Count != 0; + + public List<BasicBlock> Predecessors { get; } + + public HashSet<BasicBlock> DominanceFrontiers { get; } + + public BasicBlock ImmediateDominator { get; set; } + + public BasicBlock() + { + Operations = new LinkedList<INode>(); + + Predecessors = new List<BasicBlock>(); + + DominanceFrontiers = new HashSet<BasicBlock>(); + } + + public BasicBlock(int index) : this() + { + Index = index; + } + + private BasicBlock AddSuccessor(BasicBlock oldBlock, BasicBlock newBlock) + { + oldBlock?.Predecessors.Remove(this); + newBlock?.Predecessors.Add(this); + + return newBlock; + } + + public INode GetLastOp() + { + return Operations.Last?.Value; + } + + public void Append(INode node) + { + INode lastOp = GetLastOp(); + + if (lastOp is Operation operation && IsControlFlowInst(operation.Inst)) + { + Operations.AddBefore(Operations.Last, node); + } + else + { + Operations.AddLast(node); + } + } + + private static bool IsControlFlowInst(Instruction inst) + { + switch (inst) + { + case Instruction.Branch: + case Instruction.BranchIfFalse: + case Instruction.BranchIfTrue: + case Instruction.Discard: + case Instruction.Return: + return true; + } + + return false; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/CommentNode.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/CommentNode.cs new file mode 100644 index 00000000..d4d87b06 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/CommentNode.cs @@ -0,0 +1,12 @@ +namespace Ryujinx.Graphics.Shader.IntermediateRepresentation +{ + class CommentNode : Operation + { + public string Comment { get; } + + public CommentNode(string comment) : base(Instruction.Comment, null) + { + Comment = comment; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Function.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Function.cs new file mode 100644 index 00000000..e535c3fc --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Function.cs @@ -0,0 +1,23 @@ +namespace Ryujinx.Graphics.Shader.IntermediateRepresentation +{ + class Function + { + public BasicBlock[] Blocks { get; } + + public string Name { get; } + + public bool ReturnsValue { get; } + + public int InArgumentsCount { get; } + public int OutArgumentsCount { get; } + + public Function(BasicBlock[] blocks, string name, bool returnsValue, int inArgumentsCount, int outArgumentsCount) + { + Blocks = blocks; + Name = name; + ReturnsValue = returnsValue; + InArgumentsCount = inArgumentsCount; + OutArgumentsCount = outArgumentsCount; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/INode.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/INode.cs new file mode 100644 index 00000000..0f545e56 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/INode.cs @@ -0,0 +1,15 @@ +namespace Ryujinx.Graphics.Shader.IntermediateRepresentation +{ + interface INode + { + Operand Dest { get; set; } + + int DestsCount { get; } + int SourcesCount { get; } + + Operand GetDest(int index); + Operand GetSource(int index); + + void SetSource(int index, Operand operand); + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs new file mode 100644 index 00000000..d7c4a961 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs @@ -0,0 +1,178 @@ +using System; + +namespace Ryujinx.Graphics.Shader.IntermediateRepresentation +{ + [Flags] + enum Instruction + { + Absolute = 1, + Add, + AtomicAdd, + AtomicAnd, + AtomicCompareAndSwap, + AtomicMinS32, + AtomicMinU32, + AtomicMaxS32, + AtomicMaxU32, + AtomicOr, + AtomicSwap, + AtomicXor, + Ballot, + Barrier, + BitCount, + BitfieldExtractS32, + BitfieldExtractU32, + BitfieldInsert, + BitfieldReverse, + BitwiseAnd, + BitwiseExclusiveOr, + BitwiseNot, + BitwiseOr, + Branch, + BranchIfFalse, + BranchIfTrue, + Call, + Ceiling, + Clamp, + ClampU32, + Comment, + CompareEqual, + CompareGreater, + CompareGreaterOrEqual, + CompareGreaterOrEqualU32, + CompareGreaterU32, + CompareLess, + CompareLessOrEqual, + CompareLessOrEqualU32, + CompareLessU32, + CompareNotEqual, + ConditionalSelect, + ConvertFP32ToFP64, + ConvertFP64ToFP32, + ConvertFP32ToS32, + ConvertFP32ToU32, + ConvertFP64ToS32, + ConvertFP64ToU32, + ConvertS32ToFP32, + ConvertS32ToFP64, + ConvertU32ToFP32, + ConvertU32ToFP64, + Copy, + Cosine, + Ddx, + Ddy, + Discard, + Divide, + EmitVertex, + EndPrimitive, + ExponentB2, + FSIBegin, + FSIEnd, + FindLSB, + FindMSBS32, + FindMSBU32, + Floor, + FusedMultiplyAdd, + GroupMemoryBarrier, + ImageLoad, + ImageStore, + ImageAtomic, + IsNan, + Load, + LoadConstant, + LoadGlobal, + LoadLocal, + LoadShared, + LoadStorage, + Lod, + LogarithmB2, + LogicalAnd, + LogicalExclusiveOr, + LogicalNot, + LogicalOr, + LoopBreak, + LoopContinue, + MarkLabel, + Maximum, + MaximumU32, + MemoryBarrier, + Minimum, + MinimumU32, + Multiply, + MultiplyHighS32, + MultiplyHighU32, + Negate, + PackDouble2x32, + PackHalf2x16, + ReciprocalSquareRoot, + Return, + Round, + ShiftLeft, + ShiftRightS32, + ShiftRightU32, + Shuffle, + ShuffleDown, + ShuffleUp, + ShuffleXor, + Sine, + SquareRoot, + Store, + StoreGlobal, + StoreGlobal16, + StoreGlobal8, + StoreLocal, + StoreShared, + StoreShared16, + StoreShared8, + StoreStorage, + StoreStorage16, + StoreStorage8, + Subtract, + SwizzleAdd, + TextureSample, + TextureSize, + Truncate, + UnpackDouble2x32, + UnpackHalf2x16, + VectorExtract, + VoteAll, + VoteAllEqual, + VoteAny, + + Count, + + FP32 = 1 << 16, + FP64 = 1 << 17, + + Mask = 0xffff + } + + static class InstructionExtensions + { + public static bool IsAtomic(this Instruction inst) + { + switch (inst & Instruction.Mask) + { + case Instruction.AtomicAdd: + case Instruction.AtomicAnd: + case Instruction.AtomicCompareAndSwap: + case Instruction.AtomicMaxS32: + case Instruction.AtomicMaxU32: + case Instruction.AtomicMinS32: + case Instruction.AtomicMinU32: + case Instruction.AtomicOr: + case Instruction.AtomicSwap: + case Instruction.AtomicXor: + return true; + } + + return false; + } + + public static bool IsTextureQuery(this Instruction inst) + { + inst &= Instruction.Mask; + return inst == Instruction.Lod || inst == Instruction.TextureSize; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/IoVariable.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/IoVariable.cs new file mode 100644 index 00000000..a2163d14 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/IoVariable.cs @@ -0,0 +1,51 @@ +namespace Ryujinx.Graphics.Shader.IntermediateRepresentation +{ + enum IoVariable + { + Invalid, + + BackColorDiffuse, + BackColorSpecular, + BaseInstance, + BaseVertex, + ClipDistance, + CtaId, + DrawIndex, + FogCoord, + FragmentCoord, + FragmentOutputColor, + FragmentOutputDepth, + FragmentOutputIsBgra, // TODO: Remove and use constant buffer access. + FrontColorDiffuse, + FrontColorSpecular, + FrontFacing, + InstanceId, + InstanceIndex, + InvocationId, + Layer, + PatchVertices, + PointCoord, + PointSize, + Position, + PrimitiveId, + SubgroupEqMask, + SubgroupGeMask, + SubgroupGtMask, + SubgroupLaneId, + SubgroupLeMask, + SubgroupLtMask, + SupportBlockViewInverse, // TODO: Remove and use constant buffer access. + SupportBlockRenderScale, // TODO: Remove and use constant buffer access. + TessellationCoord, + TessellationLevelInner, + TessellationLevelOuter, + TextureCoord, + ThreadId, + ThreadKill, + UserDefined, + VertexId, + VertexIndex, + ViewportIndex, + ViewportMask + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/IrConsts.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/IrConsts.cs new file mode 100644 index 00000000..c264e47d --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/IrConsts.cs @@ -0,0 +1,8 @@ +namespace Ryujinx.Graphics.Shader.IntermediateRepresentation +{ + static class IrConsts + { + public const int False = 0; + public const int True = -1; + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operand.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operand.cs new file mode 100644 index 00000000..1df88a3d --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operand.cs @@ -0,0 +1,79 @@ +using Ryujinx.Graphics.Shader.Decoders; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Shader.IntermediateRepresentation +{ + class Operand + { + private const int CbufSlotBits = 5; + private const int CbufSlotLsb = 32 - CbufSlotBits; + private const int CbufSlotMask = (1 << CbufSlotBits) - 1; + + public OperandType Type { get; } + + public int Value { get; } + + public INode AsgOp { get; set; } + + public HashSet<INode> UseOps { get; } + + private Operand() + { + UseOps = new HashSet<INode>(); + } + + public Operand(OperandType type) : this() + { + Type = type; + } + + public Operand(OperandType type, int value) : this() + { + Type = type; + Value = value; + } + + public Operand(Register reg) : this() + { + Type = OperandType.Register; + Value = PackRegInfo(reg.Index, reg.Type); + } + + public Operand(int slot, int offset) : this() + { + Type = OperandType.ConstantBuffer; + Value = PackCbufInfo(slot, offset); + } + + private static int PackCbufInfo(int slot, int offset) + { + return (slot << CbufSlotLsb) | offset; + } + + private static int PackRegInfo(int index, RegisterType type) + { + return ((int)type << 24) | index; + } + + public int GetCbufSlot() + { + return (Value >> CbufSlotLsb) & CbufSlotMask; + } + + public int GetCbufOffset() + { + return Value & ~(CbufSlotMask << CbufSlotLsb); + } + + public Register GetRegister() + { + return new Register(Value & 0xffffff, (RegisterType)(Value >> 24)); + } + + public float AsFloat() + { + return BitConverter.Int32BitsToSingle(Value); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/OperandHelper.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/OperandHelper.cs new file mode 100644 index 00000000..37c349e8 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/OperandHelper.cs @@ -0,0 +1,62 @@ +using Ryujinx.Graphics.Shader.Decoders; +using System; + +namespace Ryujinx.Graphics.Shader.IntermediateRepresentation +{ + static class OperandHelper + { + public static Operand Argument(int value) + { + return new Operand(OperandType.Argument, value); + } + + public static Operand Cbuf(int slot, int offset) + { + return new Operand(slot, offset); + } + + public static Operand Const(int value) + { + return new Operand(OperandType.Constant, value); + } + + public static Operand ConstF(float value) + { + return new Operand(OperandType.Constant, BitConverter.SingleToInt32Bits(value)); + } + + public static Operand Label() + { + return new Operand(OperandType.Label); + } + + public static Operand Local() + { + return new Operand(OperandType.LocalVariable); + } + + public static Operand Register(int index, RegisterType type) + { + return Register(new Register(index, type)); + } + + public static Operand Register(Register reg) + { + if (reg.IsRZ) + { + return Const(0); + } + else if (reg.IsPT) + { + return Const(IrConsts.True); + } + + return new Operand(reg); + } + + public static Operand Undef() + { + return new Operand(OperandType.Undefined); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/OperandType.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/OperandType.cs new file mode 100644 index 00000000..4d2da734 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/OperandType.cs @@ -0,0 +1,13 @@ +namespace Ryujinx.Graphics.Shader.IntermediateRepresentation +{ + enum OperandType + { + Argument, + Constant, + ConstantBuffer, + Label, + LocalVariable, + Register, + Undefined + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operation.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operation.cs new file mode 100644 index 00000000..99179f15 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operation.cs @@ -0,0 +1,257 @@ +using System; +using System.Diagnostics; + +namespace Ryujinx.Graphics.Shader.IntermediateRepresentation +{ + class Operation : INode + { + public Instruction Inst { get; private set; } + public StorageKind StorageKind { get; } + + private Operand[] _dests; + + public Operand Dest + { + get + { + return _dests.Length != 0 ? _dests[0] : null; + } + set + { + if (value != null && value.Type == OperandType.LocalVariable) + { + value.AsgOp = this; + } + + if (value != null) + { + _dests = new[] { value }; + } + else + { + _dests = Array.Empty<Operand>(); + } + } + } + + public int DestsCount => _dests.Length; + + private Operand[] _sources; + + public int SourcesCount => _sources.Length; + + public int Index { get; } + + private Operation(Operand[] sources) + { + // The array may be modified externally, so we store a copy. + _sources = (Operand[])sources.Clone(); + + for (int index = 0; index < _sources.Length; index++) + { + Operand source = _sources[index]; + + if (source.Type == OperandType.LocalVariable) + { + source.UseOps.Add(this); + } + } + } + + public Operation(Instruction inst, int index, Operand[] dests, Operand[] sources) : this(sources) + { + Inst = inst; + Index = index; + + if (dests != null) + { + // The array may be modified externally, so we store a copy. + _dests = (Operand[])dests.Clone(); + + for (int dstIndex = 0; dstIndex < dests.Length; dstIndex++) + { + Operand dest = dests[dstIndex]; + + if (dest != null && dest.Type == OperandType.LocalVariable) + { + dest.AsgOp = this; + } + } + } + else + { + _dests = Array.Empty<Operand>(); + } + } + + public Operation(Instruction inst, Operand dest, params Operand[] sources) : this(sources) + { + Inst = inst; + + if (dest != null) + { + dest.AsgOp = this; + + _dests = new[] { dest }; + } + else + { + _dests = Array.Empty<Operand>(); + } + } + + public Operation(Instruction inst, StorageKind storageKind, Operand dest, params Operand[] sources) : this(sources) + { + Inst = inst; + StorageKind = storageKind; + + if (dest != null) + { + dest.AsgOp = this; + + _dests = new[] { dest }; + } + else + { + _dests = Array.Empty<Operand>(); + } + } + + public Operation(Instruction inst, int index, Operand dest, params Operand[] sources) : this(inst, dest, sources) + { + Index = index; + } + + public void AppendDests(Operand[] operands) + { + int startIndex = _dests.Length; + + Array.Resize(ref _dests, startIndex + operands.Length); + + for (int index = 0; index < operands.Length; index++) + { + Operand dest = operands[index]; + + if (dest != null && dest.Type == OperandType.LocalVariable) + { + Debug.Assert(dest.AsgOp == null); + dest.AsgOp = this; + } + + _dests[startIndex + index] = dest; + } + } + + public void AppendSources(Operand[] operands) + { + int startIndex = _sources.Length; + + Array.Resize(ref _sources, startIndex + operands.Length); + + for (int index = 0; index < operands.Length; index++) + { + Operand source = operands[index]; + + if (source.Type == OperandType.LocalVariable) + { + source.UseOps.Add(this); + } + + _sources[startIndex + index] = source; + } + } + + public Operand GetDest(int index) + { + return _dests[index]; + } + + public Operand GetSource(int index) + { + return _sources[index]; + } + + public void SetDest(int index, Operand dest) + { + Operand oldDest = _dests[index]; + + if (oldDest != null && oldDest.Type == OperandType.LocalVariable) + { + oldDest.AsgOp = null; + } + + if (dest != null && dest.Type == OperandType.LocalVariable) + { + dest.AsgOp = this; + } + + _dests[index] = dest; + } + + public void SetSource(int index, Operand source) + { + Operand oldSrc = _sources[index]; + + if (oldSrc != null && oldSrc.Type == OperandType.LocalVariable) + { + oldSrc.UseOps.Remove(this); + } + + if (source != null && source.Type == OperandType.LocalVariable) + { + source.UseOps.Add(this); + } + + _sources[index] = source; + } + + public void InsertSource(int index, Operand source) + { + Operand[] newSources = new Operand[_sources.Length + 1]; + + Array.Copy(_sources, 0, newSources, 0, index); + Array.Copy(_sources, index, newSources, index + 1, _sources.Length - index); + + newSources[index] = source; + + _sources = newSources; + } + + protected void RemoveSource(int index) + { + SetSource(index, null); + + Operand[] newSources = new Operand[_sources.Length - 1]; + + Array.Copy(_sources, 0, newSources, 0, index); + Array.Copy(_sources, index + 1, newSources, index, _sources.Length - (index + 1)); + + _sources = newSources; + } + + public void TurnIntoCopy(Operand source) + { + TurnInto(Instruction.Copy, source); + } + + public void TurnInto(Instruction newInst, Operand source) + { + Inst = newInst; + + foreach (Operand oldSrc in _sources) + { + if (oldSrc != null && oldSrc.Type == OperandType.LocalVariable) + { + oldSrc.UseOps.Remove(this); + } + } + + if (source.Type == OperandType.LocalVariable) + { + source.UseOps.Add(this); + } + + _sources = new Operand[] { source }; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/PhiNode.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/PhiNode.cs new file mode 100644 index 00000000..8fa25ae9 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/PhiNode.cs @@ -0,0 +1,107 @@ +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Shader.IntermediateRepresentation +{ + class PhiNode : INode + { + private Operand _dest; + + public Operand Dest + { + get => _dest; + set => _dest = AssignDest(value); + } + + public int DestsCount => _dest != null ? 1 : 0; + + private HashSet<BasicBlock> _blocks; + + private class PhiSource + { + public BasicBlock Block { get; } + public Operand Operand { get; set; } + + public PhiSource(BasicBlock block, Operand operand) + { + Block = block; + Operand = operand; + } + } + + private List<PhiSource> _sources; + + public int SourcesCount => _sources.Count; + + public PhiNode(Operand dest) + { + _blocks = new HashSet<BasicBlock>(); + + _sources = new List<PhiSource>(); + + dest.AsgOp = this; + + Dest = dest; + } + + private Operand AssignDest(Operand dest) + { + if (dest != null && dest.Type == OperandType.LocalVariable) + { + dest.AsgOp = this; + } + + return dest; + } + + public void AddSource(BasicBlock block, Operand operand) + { + if (_blocks.Add(block)) + { + if (operand.Type == OperandType.LocalVariable) + { + operand.UseOps.Add(this); + } + + _sources.Add(new PhiSource(block, operand)); + } + } + + public Operand GetDest(int index) + { + if (index != 0) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + return _dest; + } + + public Operand GetSource(int index) + { + return _sources[index].Operand; + } + + public BasicBlock GetBlock(int index) + { + return _sources[index].Block; + } + + public void SetSource(int index, Operand source) + { + Operand oldSrc = _sources[index].Operand; + + if (oldSrc != null && oldSrc.Type == OperandType.LocalVariable) + { + oldSrc.UseOps.Remove(this); + } + + if (source.Type == OperandType.LocalVariable) + { + source.UseOps.Add(this); + } + + _sources[index].Operand = source; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/StorageKind.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/StorageKind.cs new file mode 100644 index 00000000..59357443 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/StorageKind.cs @@ -0,0 +1,39 @@ +namespace Ryujinx.Graphics.Shader.IntermediateRepresentation +{ + enum StorageKind + { + None, + Input, + InputPerPatch, + Output, + OutputPerPatch, + ConstantBuffer, + StorageBuffer, + LocalMemory, + SharedMemory, + GlobalMemory + } + + static class StorageKindExtensions + { + public static bool IsInputOrOutput(this StorageKind storageKind) + { + return storageKind == StorageKind.Input || + storageKind == StorageKind.InputPerPatch || + storageKind == StorageKind.Output || + storageKind == StorageKind.OutputPerPatch; + } + + public static bool IsOutput(this StorageKind storageKind) + { + return storageKind == StorageKind.Output || + storageKind == StorageKind.OutputPerPatch; + } + + public static bool IsPerPatch(this StorageKind storageKind) + { + return storageKind == StorageKind.InputPerPatch || + storageKind == StorageKind.OutputPerPatch; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureFlags.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureFlags.cs new file mode 100644 index 00000000..6c20e856 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureFlags.cs @@ -0,0 +1,32 @@ +using System; + +namespace Ryujinx.Graphics.Shader.IntermediateRepresentation +{ + [Flags] + enum TextureFlags + { + None = 0, + Bindless = 1 << 0, + Gather = 1 << 1, + Derivatives = 1 << 2, + IntCoords = 1 << 3, + LodBias = 1 << 4, + LodLevel = 1 << 5, + Offset = 1 << 6, + Offsets = 1 << 7, + Coherent = 1 << 8, + + AtomicMask = 15 << 16, + + Add = 0 << 16, + Minimum = 1 << 16, + Maximum = 2 << 16, + Increment = 3 << 16, + Decrement = 4 << 16, + BitwiseAnd = 5 << 16, + BitwiseOr = 6 << 16, + BitwiseXor = 7 << 16, + Swap = 8 << 16, + CAS = 9 << 16 + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureOperation.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureOperation.cs new file mode 100644 index 00000000..6ab868cd --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureOperation.cs @@ -0,0 +1,69 @@ +namespace Ryujinx.Graphics.Shader.IntermediateRepresentation +{ + class TextureOperation : Operation + { + public const int DefaultCbufSlot = -1; + + public SamplerType Type { get; set; } + public TextureFormat Format { get; set; } + public TextureFlags Flags { get; private set; } + + public int CbufSlot { get; private set; } + public int Handle { get; private set; } + + public TextureOperation( + Instruction inst, + SamplerType type, + TextureFormat format, + TextureFlags flags, + int cbufSlot, + int handle, + int compIndex, + Operand[] dests, + Operand[] sources) : base(inst, compIndex, dests, sources) + { + Type = type; + Format = format; + Flags = flags; + CbufSlot = cbufSlot; + Handle = handle; + } + + public TextureOperation( + Instruction inst, + SamplerType type, + TextureFormat format, + TextureFlags flags, + int handle, + int compIndex, + Operand[] dests, + Operand[] sources) : this(inst, type, format, flags, DefaultCbufSlot, handle, compIndex, dests, sources) + { + } + + public void TurnIntoIndexed(int handle) + { + Type |= SamplerType.Indexed; + Flags &= ~TextureFlags.Bindless; + Handle = handle; + } + + public void SetHandle(int handle, int cbufSlot = DefaultCbufSlot) + { + if ((Flags & TextureFlags.Bindless) != 0) + { + Flags &= ~TextureFlags.Bindless; + + RemoveSource(0); + } + + CbufSlot = cbufSlot; + Handle = handle; + } + + public void SetLodLevelFlag() + { + Flags |= TextureFlags.LodLevel; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/OutputTopology.cs b/src/Ryujinx.Graphics.Shader/OutputTopology.cs new file mode 100644 index 00000000..6f977bec --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/OutputTopology.cs @@ -0,0 +1,24 @@ +namespace Ryujinx.Graphics.Shader +{ + enum OutputTopology + { + PointList = 1, + LineStrip = 6, + TriangleStrip = 7 + } + + static class OutputTopologyExtensions + { + public static string ToGlslString(this OutputTopology topology) + { + switch (topology) + { + case OutputTopology.LineStrip: return "line_strip"; + case OutputTopology.PointList: return "points"; + case OutputTopology.TriangleStrip: return "triangle_strip"; + } + + return "points"; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj b/src/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj new file mode 100644 index 00000000..3434e2a8 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj @@ -0,0 +1,33 @@ +<Project Sdk="Microsoft.NET.Sdk"> + + <PropertyGroup> + <TargetFramework>net7.0</TargetFramework> + </PropertyGroup> + + <ItemGroup> + <None Remove="CodeGen\Glsl\HelperFunctions\TexelFetchScale_vp.glsl" /> + </ItemGroup> + + <ItemGroup> + <ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" /> + <ProjectReference Include="..\Spv.Generator\Spv.Generator.csproj" /> + </ItemGroup> + + <ItemGroup> + <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\AtomicMinMaxS32Shared.glsl" /> + <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\AtomicMinMaxS32Storage.glsl" /> + <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\MultiplyHighS32.glsl" /> + <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\MultiplyHighU32.glsl" /> + <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\Shuffle.glsl" /> + <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleDown.glsl" /> + <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleUp.glsl" /> + <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleXor.glsl" /> + <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\StoreSharedSmallInt.glsl" /> + <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\StoreStorageSmallInt.glsl" /> + <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\SwizzleAdd.glsl" /> + <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\TexelFetchScale_vp.glsl" /> + <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\TexelFetchScale_fp.glsl" /> + <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\TexelFetchScale_cp.glsl" /> + </ItemGroup> + +</Project> diff --git a/src/Ryujinx.Graphics.Shader/SamplerType.cs b/src/Ryujinx.Graphics.Shader/SamplerType.cs new file mode 100644 index 00000000..620f4ccf --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/SamplerType.cs @@ -0,0 +1,100 @@ +using Ryujinx.Graphics.Shader.Translation; +using System; + +namespace Ryujinx.Graphics.Shader +{ + [Flags] + public enum SamplerType + { + None = 0, + Texture1D, + TextureBuffer, + Texture2D, + Texture3D, + TextureCube, + + Mask = 0xff, + + Array = 1 << 8, + Indexed = 1 << 9, + Multisample = 1 << 10, + Shadow = 1 << 11 + } + + static class SamplerTypeExtensions + { + public static int GetDimensions(this SamplerType type) + { + return (type & SamplerType.Mask) switch + { + SamplerType.Texture1D => 1, + SamplerType.TextureBuffer => 1, + SamplerType.Texture2D => 2, + SamplerType.Texture3D => 3, + SamplerType.TextureCube => 3, + _ => throw new ArgumentException($"Invalid sampler type \"{type}\".") + }; + } + + public static string ToGlslSamplerType(this SamplerType type) + { + string typeName = (type & SamplerType.Mask) switch + { + SamplerType.Texture1D => "sampler1D", + SamplerType.TextureBuffer => "samplerBuffer", + SamplerType.Texture2D => "sampler2D", + SamplerType.Texture3D => "sampler3D", + SamplerType.TextureCube => "samplerCube", + _ => throw new ArgumentException($"Invalid sampler type \"{type}\".") + }; + + if ((type & SamplerType.Multisample) != 0) + { + typeName += "MS"; + } + + if ((type & SamplerType.Array) != 0) + { + typeName += "Array"; + } + + if ((type & SamplerType.Shadow) != 0) + { + typeName += "Shadow"; + } + + return typeName; + } + + public static string ToGlslImageType(this SamplerType type, AggregateType componentType) + { + string typeName = (type & SamplerType.Mask) switch + { + SamplerType.Texture1D => "image1D", + SamplerType.TextureBuffer => "imageBuffer", + SamplerType.Texture2D => "image2D", + SamplerType.Texture3D => "image3D", + SamplerType.TextureCube => "imageCube", + _ => throw new ArgumentException($"Invalid sampler type \"{type}\".") + }; + + if ((type & SamplerType.Multisample) != 0) + { + typeName += "MS"; + } + + if ((type & SamplerType.Array) != 0) + { + typeName += "Array"; + } + + switch (componentType) + { + case AggregateType.U32: typeName = 'u' + typeName; break; + case AggregateType.S32: typeName = 'i' + typeName; break; + } + + return typeName; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/ShaderIdentification.cs b/src/Ryujinx.Graphics.Shader/ShaderIdentification.cs new file mode 100644 index 00000000..3f015762 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/ShaderIdentification.cs @@ -0,0 +1,8 @@ +namespace Ryujinx.Graphics.Shader +{ + public enum ShaderIdentification + { + None, + GeometryLayerPassthrough + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/ShaderProgram.cs b/src/Ryujinx.Graphics.Shader/ShaderProgram.cs new file mode 100644 index 00000000..29fff21e --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/ShaderProgram.cs @@ -0,0 +1,35 @@ +using Ryujinx.Graphics.Shader.Translation; +using System; + +namespace Ryujinx.Graphics.Shader +{ + public class ShaderProgram + { + public ShaderProgramInfo Info { get; } + public TargetLanguage Language { get; } + + public string Code { get; private set; } + public byte[] BinaryCode { get; } + + private ShaderProgram(ShaderProgramInfo info, TargetLanguage language) + { + Info = info; + Language = language; + } + + public ShaderProgram(ShaderProgramInfo info, TargetLanguage language, string code) : this(info, language) + { + Code = code; + } + + public ShaderProgram(ShaderProgramInfo info, TargetLanguage language, byte[] binaryCode) : this(info, language) + { + BinaryCode = binaryCode; + } + + public void Prepend(string line) + { + Code = line + Environment.NewLine + Code; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs b/src/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs new file mode 100644 index 00000000..30f0ffaa --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs @@ -0,0 +1,51 @@ +using System; +using System.Collections.ObjectModel; + +namespace Ryujinx.Graphics.Shader +{ + public class ShaderProgramInfo + { + public ReadOnlyCollection<BufferDescriptor> CBuffers { get; } + public ReadOnlyCollection<BufferDescriptor> SBuffers { get; } + public ReadOnlyCollection<TextureDescriptor> Textures { get; } + public ReadOnlyCollection<TextureDescriptor> Images { get; } + + public ShaderIdentification Identification { get; } + public int GpLayerInputAttribute { get; } + public ShaderStage Stage { get; } + public bool UsesInstanceId { get; } + public bool UsesDrawParameters { get; } + public bool UsesRtLayer { get; } + public byte ClipDistancesWritten { get; } + public int FragmentOutputMap { get; } + + public ShaderProgramInfo( + BufferDescriptor[] cBuffers, + BufferDescriptor[] sBuffers, + TextureDescriptor[] textures, + TextureDescriptor[] images, + ShaderIdentification identification, + int gpLayerInputAttribute, + ShaderStage stage, + bool usesInstanceId, + bool usesDrawParameters, + bool usesRtLayer, + byte clipDistancesWritten, + int fragmentOutputMap) + { + CBuffers = Array.AsReadOnly(cBuffers); + SBuffers = Array.AsReadOnly(sBuffers); + Textures = Array.AsReadOnly(textures); + Images = Array.AsReadOnly(images); + + Identification = identification; + GpLayerInputAttribute = gpLayerInputAttribute; + Stage = stage; + UsesInstanceId = usesInstanceId; + UsesDrawParameters = usesDrawParameters; + UsesRtLayer = usesRtLayer; + ClipDistancesWritten = clipDistancesWritten; + FragmentOutputMap = fragmentOutputMap; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/ShaderStage.cs b/src/Ryujinx.Graphics.Shader/ShaderStage.cs new file mode 100644 index 00000000..f16fe328 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/ShaderStage.cs @@ -0,0 +1,27 @@ +namespace Ryujinx.Graphics.Shader +{ + public enum ShaderStage : byte + { + Compute, + Vertex, + TessellationControl, + TessellationEvaluation, + Geometry, + Fragment, + + Count + } + + public static class ShaderStageExtensions + { + /// <summary> + /// Checks if the shader stage supports render scale. + /// </summary> + /// <param name="stage">Shader stage</param> + /// <returns>True if the shader stage supports render scale, false otherwise</returns> + public static bool SupportsRenderScale(this ShaderStage stage) + { + return stage == ShaderStage.Vertex || stage == ShaderStage.Fragment || stage == ShaderStage.Compute; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/AstAssignment.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/AstAssignment.cs new file mode 100644 index 00000000..bb3fe7af --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/AstAssignment.cs @@ -0,0 +1,35 @@ +using static Ryujinx.Graphics.Shader.StructuredIr.AstHelper; + +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + class AstAssignment : AstNode + { + public IAstNode Destination { get; } + + private IAstNode _source; + + public IAstNode Source + { + get + { + return _source; + } + set + { + RemoveUse(_source, this); + + AddUse(value, this); + + _source = value; + } + } + + public AstAssignment(IAstNode destination, IAstNode source) + { + Destination = destination; + Source = source; + + AddDef(destination, this); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/AstBlock.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/AstBlock.cs new file mode 100644 index 00000000..2f34bee8 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/AstBlock.cs @@ -0,0 +1,117 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System; +using System.Collections; +using System.Collections.Generic; + +using static Ryujinx.Graphics.Shader.StructuredIr.AstHelper; + +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + class AstBlock : AstNode, IEnumerable<IAstNode> + { + public AstBlockType Type { get; private set; } + + private IAstNode _condition; + + public IAstNode Condition + { + get + { + return _condition; + } + set + { + RemoveUse(_condition, this); + + AddUse(value, this); + + _condition = value; + } + } + + private LinkedList<IAstNode> _nodes; + + public IAstNode First => _nodes.First?.Value; + public IAstNode Last => _nodes.Last?.Value; + + public int Count => _nodes.Count; + + public AstBlock(AstBlockType type, IAstNode condition = null) + { + Type = type; + Condition = condition; + + _nodes = new LinkedList<IAstNode>(); + } + + public void Add(IAstNode node) + { + Add(node, _nodes.AddLast(node)); + } + + public void AddFirst(IAstNode node) + { + Add(node, _nodes.AddFirst(node)); + } + + public void AddBefore(IAstNode next, IAstNode node) + { + Add(node, _nodes.AddBefore(next.LLNode, node)); + } + + public void AddAfter(IAstNode prev, IAstNode node) + { + Add(node, _nodes.AddAfter(prev.LLNode, node)); + } + + private void Add(IAstNode node, LinkedListNode<IAstNode> newNode) + { + if (node.Parent != null) + { + throw new ArgumentException("Node already belongs to a block."); + } + + node.Parent = this; + node.LLNode = newNode; + } + + public void Remove(IAstNode node) + { + _nodes.Remove(node.LLNode); + + node.Parent = null; + node.LLNode = null; + } + + public void AndCondition(IAstNode cond) + { + Condition = new AstOperation(Instruction.LogicalAnd, Condition, cond); + } + + public void OrCondition(IAstNode cond) + { + Condition = new AstOperation(Instruction.LogicalOr, Condition, cond); + } + public void TurnIntoIf(IAstNode cond) + { + Condition = cond; + + Type = AstBlockType.If; + } + + public void TurnIntoElseIf() + { + Type = AstBlockType.ElseIf; + } + + public IEnumerator<IAstNode> GetEnumerator() + { + return _nodes.GetEnumerator(); + } + + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/AstBlockType.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/AstBlockType.cs new file mode 100644 index 00000000..c12efda9 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/AstBlockType.cs @@ -0,0 +1,12 @@ +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + enum AstBlockType + { + DoWhile, + If, + Else, + ElseIf, + Main, + While + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/AstBlockVisitor.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/AstBlockVisitor.cs new file mode 100644 index 00000000..10d5dce0 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/AstBlockVisitor.cs @@ -0,0 +1,68 @@ +using System; +using System.Collections.Generic; + +using static Ryujinx.Graphics.Shader.StructuredIr.AstHelper; + +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + class AstBlockVisitor + { + public AstBlock Block { get; private set; } + + public class BlockVisitationEventArgs : EventArgs + { + public AstBlock Block { get; } + + public BlockVisitationEventArgs(AstBlock block) + { + Block = block; + } + } + + public event EventHandler<BlockVisitationEventArgs> BlockEntered; + public event EventHandler<BlockVisitationEventArgs> BlockLeft; + + public AstBlockVisitor(AstBlock mainBlock) + { + Block = mainBlock; + } + + public IEnumerable<IAstNode> Visit() + { + IAstNode node = Block.First; + + while (node != null) + { + // We reached a child block, visit the nodes inside. + while (node is AstBlock childBlock) + { + Block = childBlock; + + node = childBlock.First; + + BlockEntered?.Invoke(this, new BlockVisitationEventArgs(Block)); + } + + // Node may be null, if the block is empty. + if (node != null) + { + IAstNode next = Next(node); + + yield return node; + + node = next; + } + + // We reached the end of the list, go up on tree to the parent blocks. + while (node == null && Block.Type != AstBlockType.Main) + { + BlockLeft?.Invoke(this, new BlockVisitationEventArgs(Block)); + + node = Next(Block); + + Block = Block.Parent; + } + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/AstComment.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/AstComment.cs new file mode 100644 index 00000000..dabe623f --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/AstComment.cs @@ -0,0 +1,12 @@ +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + class AstComment : AstNode + { + public string Comment { get; } + + public AstComment(string comment) + { + Comment = comment; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/AstHelper.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/AstHelper.cs new file mode 100644 index 00000000..7aa0409b --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/AstHelper.cs @@ -0,0 +1,74 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + static class AstHelper + { + public static void AddUse(IAstNode node, IAstNode parent) + { + if (node is AstOperand operand && operand.Type == OperandType.LocalVariable) + { + operand.Uses.Add(parent); + } + } + + public static void AddDef(IAstNode node, IAstNode parent) + { + if (node is AstOperand operand && operand.Type == OperandType.LocalVariable) + { + operand.Defs.Add(parent); + } + } + + public static void RemoveUse(IAstNode node, IAstNode parent) + { + if (node is AstOperand operand && operand.Type == OperandType.LocalVariable) + { + operand.Uses.Remove(parent); + } + } + + public static void RemoveDef(IAstNode node, IAstNode parent) + { + if (node is AstOperand operand && operand.Type == OperandType.LocalVariable) + { + operand.Defs.Remove(parent); + } + } + + public static AstAssignment Assign(IAstNode destination, IAstNode source) + { + return new AstAssignment(destination, source); + } + + public static AstOperand Const(int value) + { + return new AstOperand(OperandType.Constant, value); + } + + public static AstOperand Local(AggregateType type) + { + AstOperand local = new AstOperand(OperandType.LocalVariable); + + local.VarType = type; + + return local; + } + + public static IAstNode InverseCond(IAstNode cond) + { + return new AstOperation(Instruction.LogicalNot, cond); + } + + public static IAstNode Next(IAstNode node) + { + return node.LLNode.Next?.Value; + } + + public static IAstNode Previous(IAstNode node) + { + return node.LLNode.Previous?.Value; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/AstNode.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/AstNode.cs new file mode 100644 index 00000000..c667aac9 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/AstNode.cs @@ -0,0 +1,11 @@ +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + class AstNode : IAstNode + { + public AstBlock Parent { get; set; } + + public LinkedListNode<IAstNode> LLNode { get; set; } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/AstOperand.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/AstOperand.cs new file mode 100644 index 00000000..1fc0035f --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/AstOperand.cs @@ -0,0 +1,50 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + class AstOperand : AstNode + { + public HashSet<IAstNode> Defs { get; } + public HashSet<IAstNode> Uses { get; } + + public OperandType Type { get; } + + public AggregateType VarType { get; set; } + + public int Value { get; } + + public int CbufSlot { get; } + public int CbufOffset { get; } + + private AstOperand() + { + Defs = new HashSet<IAstNode>(); + Uses = new HashSet<IAstNode>(); + + VarType = AggregateType.S32; + } + + public AstOperand(Operand operand) : this() + { + Type = operand.Type; + + if (Type == OperandType.ConstantBuffer) + { + CbufSlot = operand.GetCbufSlot(); + CbufOffset = operand.GetCbufOffset(); + } + else + { + Value = operand.Value; + } + } + + public AstOperand(OperandType type, int value = 0) : this() + { + Type = type; + Value = value; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/AstOperation.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/AstOperation.cs new file mode 100644 index 00000000..2393fd8d --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/AstOperation.cs @@ -0,0 +1,80 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System.Numerics; + +using static Ryujinx.Graphics.Shader.StructuredIr.AstHelper; + +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + class AstOperation : AstNode + { + public Instruction Inst { get; } + public StorageKind StorageKind { get; } + + public int Index { get; } + + private IAstNode[] _sources; + + public int SourcesCount => _sources.Length; + + public AstOperation(Instruction inst, StorageKind storageKind, IAstNode[] sources, int sourcesCount) + { + Inst = inst; + StorageKind = storageKind; + _sources = sources; + + for (int index = 0; index < sources.Length; index++) + { + if (index < sourcesCount) + { + AddUse(sources[index], this); + } + else + { + AddDef(sources[index], this); + } + } + + Index = 0; + } + + public AstOperation(Instruction inst, StorageKind storageKind, int index, IAstNode[] sources, int sourcesCount) : this(inst, storageKind, sources, sourcesCount) + { + Index = index; + } + + public AstOperation(Instruction inst, params IAstNode[] sources) : this(inst, StorageKind.None, sources, sources.Length) + { + } + + public IAstNode GetSource(int index) + { + return _sources[index]; + } + + public void SetSource(int index, IAstNode source) + { + RemoveUse(_sources[index], this); + + AddUse(source, this); + + _sources[index] = source; + } + + public AggregateType GetVectorType(AggregateType scalarType) + { + int componentsCount = BitOperations.PopCount((uint)Index); + + AggregateType type = scalarType; + + switch (componentsCount) + { + case 2: type |= AggregateType.Vector2; break; + case 3: type |= AggregateType.Vector3; break; + case 4: type |= AggregateType.Vector4; break; + } + + return type; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/AstOptimizer.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/AstOptimizer.cs new file mode 100644 index 00000000..b71ae2c4 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/AstOptimizer.cs @@ -0,0 +1,155 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System.Collections.Generic; +using System.Linq; + +using static Ryujinx.Graphics.Shader.StructuredIr.AstHelper; + +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + static class AstOptimizer + { + public static void Optimize(StructuredProgramContext context) + { + AstBlock mainBlock = context.CurrentFunction.MainBlock; + + // When debug mode is enabled, we disable expression propagation + // (this makes comparison with the disassembly easier). + if (!context.Config.Options.Flags.HasFlag(TranslationFlags.DebugMode)) + { + AstBlockVisitor visitor = new AstBlockVisitor(mainBlock); + + foreach (IAstNode node in visitor.Visit()) + { + if (node is AstAssignment assignment && assignment.Destination is AstOperand propVar) + { + bool isWorthPropagating = propVar.Uses.Count == 1 || IsWorthPropagating(assignment.Source); + + if (propVar.Defs.Count == 1 && isWorthPropagating) + { + PropagateExpression(propVar, assignment.Source); + } + + if (propVar.Type == OperandType.LocalVariable && propVar.Uses.Count == 0) + { + visitor.Block.Remove(assignment); + + context.CurrentFunction.Locals.Remove(propVar); + } + } + } + } + + RemoveEmptyBlocks(mainBlock); + } + + private static bool IsWorthPropagating(IAstNode source) + { + if (!(source is AstOperation srcOp)) + { + return false; + } + + if (!InstructionInfo.IsUnary(srcOp.Inst)) + { + return false; + } + + return srcOp.GetSource(0) is AstOperand || srcOp.Inst == Instruction.Copy; + } + + private static void PropagateExpression(AstOperand propVar, IAstNode source) + { + IAstNode[] uses = propVar.Uses.ToArray(); + + foreach (IAstNode useNode in uses) + { + if (useNode is AstBlock useBlock) + { + useBlock.Condition = source; + } + else if (useNode is AstOperation useOperation) + { + for (int srcIndex = 0; srcIndex < useOperation.SourcesCount; srcIndex++) + { + if (useOperation.GetSource(srcIndex) == propVar) + { + useOperation.SetSource(srcIndex, source); + } + } + } + else if (useNode is AstAssignment useAssignment) + { + useAssignment.Source = source; + } + } + } + + private static void RemoveEmptyBlocks(AstBlock mainBlock) + { + Queue<AstBlock> pending = new Queue<AstBlock>(); + + pending.Enqueue(mainBlock); + + while (pending.TryDequeue(out AstBlock block)) + { + foreach (IAstNode node in block) + { + if (node is AstBlock childBlock) + { + pending.Enqueue(childBlock); + } + } + + AstBlock parent = block.Parent; + + if (parent == null) + { + continue; + } + + AstBlock nextBlock = Next(block) as AstBlock; + + bool hasElse = nextBlock != null && nextBlock.Type == AstBlockType.Else; + + bool isIf = block.Type == AstBlockType.If; + + if (block.Count == 0) + { + if (isIf) + { + if (hasElse) + { + nextBlock.TurnIntoIf(InverseCond(block.Condition)); + } + + parent.Remove(block); + } + else if (block.Type == AstBlockType.Else) + { + parent.Remove(block); + } + } + else if (isIf && parent.Type == AstBlockType.Else && parent.Count == (hasElse ? 2 : 1)) + { + AstBlock parentOfParent = parent.Parent; + + parent.Remove(block); + + parentOfParent.AddAfter(parent, block); + + if (hasElse) + { + parent.Remove(nextBlock); + + parentOfParent.AddAfter(block, nextBlock); + } + + parentOfParent.Remove(parent); + + block.TurnIntoElseIf(); + } + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/AstTextureOperation.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/AstTextureOperation.cs new file mode 100644 index 00000000..a44f13cc --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/AstTextureOperation.cs @@ -0,0 +1,36 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; + +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + class AstTextureOperation : AstOperation + { + public SamplerType Type { get; } + public TextureFormat Format { get; } + public TextureFlags Flags { get; } + + public int CbufSlot { get; } + public int Handle { get; } + + public AstTextureOperation( + Instruction inst, + SamplerType type, + TextureFormat format, + TextureFlags flags, + int cbufSlot, + int handle, + int index, + params IAstNode[] sources) : base(inst, StorageKind.None, index, sources, sources.Length) + { + Type = type; + Format = format; + Flags = flags; + CbufSlot = cbufSlot; + Handle = handle; + } + + public AstTextureOperation WithType(SamplerType type) + { + return new AstTextureOperation(Inst, type, Format, Flags, CbufSlot, Handle, Index); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/GotoElimination.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/GotoElimination.cs new file mode 100644 index 00000000..8bcf9d9c --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/GotoElimination.cs @@ -0,0 +1,459 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System; +using System.Collections.Generic; + +using static Ryujinx.Graphics.Shader.StructuredIr.AstHelper; + +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + static class GotoElimination + { + // This is a modified version of the algorithm presented on the paper + // "Taming Control Flow: A Structured Approach to Eliminating Goto Statements". + public static void Eliminate(GotoStatement[] gotos) + { + for (int index = gotos.Length - 1; index >= 0; index--) + { + GotoStatement stmt = gotos[index]; + + AstBlock gBlock = ParentBlock(stmt.Goto); + AstBlock lBlock = ParentBlock(stmt.Label); + + int gLevel = Level(gBlock); + int lLevel = Level(lBlock); + + if (IndirectlyRelated(gBlock, lBlock, gLevel, lLevel)) + { + AstBlock drBlock = gBlock; + + int drLevel = gLevel; + + do + { + drBlock = drBlock.Parent; + + drLevel--; + } + while (!DirectlyRelated(drBlock, lBlock, drLevel, lLevel)); + + MoveOutward(stmt, gLevel, drLevel); + + gBlock = drBlock; + gLevel = drLevel; + + if (Previous(stmt.Goto) is AstBlock elseBlock && elseBlock.Type == AstBlockType.Else) + { + // It's possible that the label was enclosed inside an else block, + // in this case we need to update the block and level. + // We also need to set the IsLoop for the case when the label is + // now before the goto, due to the newly introduced else block. + lBlock = ParentBlock(stmt.Label); + + lLevel = Level(lBlock); + + if (!IndirectlyRelated(elseBlock, lBlock, gLevel + 1, lLevel)) + { + stmt.IsLoop = true; + } + } + } + + if (DirectlyRelated(gBlock, lBlock, gLevel, lLevel)) + { + if (gLevel > lLevel) + { + MoveOutward(stmt, gLevel, lLevel); + } + else + { + if (stmt.IsLoop) + { + Lift(stmt); + } + + MoveInward(stmt); + } + } + + gBlock = ParentBlock(stmt.Goto); + + if (stmt.IsLoop) + { + EncloseDoWhile(stmt, gBlock, stmt.Label); + } + else + { + Enclose(gBlock, AstBlockType.If, stmt.Condition, Next(stmt.Goto), stmt.Label); + } + + gBlock.Remove(stmt.Goto); + } + } + + private static bool IndirectlyRelated(AstBlock lBlock, AstBlock rBlock, int lLevel, int rlevel) + { + return !(lBlock == rBlock || DirectlyRelated(lBlock, rBlock, lLevel, rlevel)); + } + + private static bool DirectlyRelated(AstBlock lBlock, AstBlock rBlock, int lLevel, int rLevel) + { + // If the levels are equal, they can be either siblings or indirectly related. + if (lLevel == rLevel) + { + return false; + } + + IAstNode block; + IAstNode other; + + int blockLvl, otherLvl; + + if (lLevel > rLevel) + { + block = lBlock; + blockLvl = lLevel; + other = rBlock; + otherLvl = rLevel; + } + else /* if (rLevel > lLevel) */ + { + block = rBlock; + blockLvl = rLevel; + other = lBlock; + otherLvl = lLevel; + } + + while (blockLvl >= otherLvl) + { + if (block == other) + { + return true; + } + + block = block.Parent; + + blockLvl--; + } + + return false; + } + + private static void Lift(GotoStatement stmt) + { + AstBlock block = ParentBlock(stmt.Goto); + + AstBlock[] path = BackwardsPath(block, ParentBlock(stmt.Label)); + + AstBlock loopFirstStmt = path[path.Length - 1]; + + if (loopFirstStmt.Type == AstBlockType.Else) + { + loopFirstStmt = Previous(loopFirstStmt) as AstBlock; + + if (loopFirstStmt == null || loopFirstStmt.Type != AstBlockType.If) + { + throw new InvalidOperationException("Found an else without a matching if."); + } + } + + AstBlock newBlock = EncloseDoWhile(stmt, block, loopFirstStmt); + + block.Remove(stmt.Goto); + + newBlock.AddFirst(stmt.Goto); + + stmt.IsLoop = false; + } + + private static void MoveOutward(GotoStatement stmt, int gLevel, int lLevel) + { + AstBlock origin = ParentBlock(stmt.Goto); + + AstBlock block = origin; + + // Check if a loop is enclosing the goto, and the block that is + // directly related to the label is above the loop block. + // In that case, we need to introduce a break to get out of the loop. + AstBlock loopBlock = origin; + + int loopLevel = gLevel; + + while (loopLevel > lLevel) + { + AstBlock child = loopBlock; + + loopBlock = loopBlock.Parent; + + loopLevel--; + + if (child.Type == AstBlockType.DoWhile) + { + EncloseSingleInst(stmt, Instruction.LoopBreak); + + block.Remove(stmt.Goto); + + loopBlock.AddAfter(child, stmt.Goto); + + block = loopBlock; + gLevel = loopLevel; + } + } + + // Insert ifs to skip the parts that shouldn't be executed due to the goto. + bool tryInsertElse = stmt.IsUnconditional && origin.Type == AstBlockType.If; + + while (gLevel > lLevel) + { + Enclose(block, AstBlockType.If, stmt.Condition, Next(stmt.Goto)); + + block.Remove(stmt.Goto); + + AstBlock child = block; + + // We can't move the goto in the middle of a if and a else block, in + // this case we need to move it after the else. + // IsLoop may need to be updated if the label is inside the else, as + // introducing a loop is the only way to ensure the else will be executed. + if (Next(child) is AstBlock elseBlock && elseBlock.Type == AstBlockType.Else) + { + child = elseBlock; + } + + block = block.Parent; + + block.AddAfter(child, stmt.Goto); + + gLevel--; + + if (tryInsertElse && child == origin) + { + AstBlock lBlock = ParentBlock(stmt.Label); + + IAstNode last = block == lBlock && !stmt.IsLoop ? stmt.Label : null; + + AstBlock newBlock = Enclose(block, AstBlockType.Else, null, Next(stmt.Goto), last); + + if (newBlock != null) + { + block.Remove(stmt.Goto); + + block.AddAfter(newBlock, stmt.Goto); + } + } + } + } + + private static void MoveInward(GotoStatement stmt) + { + AstBlock block = ParentBlock(stmt.Goto); + + AstBlock[] path = BackwardsPath(block, ParentBlock(stmt.Label)); + + for (int index = path.Length - 1; index >= 0; index--) + { + AstBlock child = path[index]; + AstBlock last = child; + + if (child.Type == AstBlockType.If) + { + // Modify the if condition to allow it to be entered by the goto. + if (!ContainsCondComb(child.Condition, Instruction.LogicalOr, stmt.Condition)) + { + child.OrCondition(stmt.Condition); + } + } + else if (child.Type == AstBlockType.Else) + { + // Modify the matching if condition to force the else to be entered by the goto. + if (!(Previous(child) is AstBlock ifBlock) || ifBlock.Type != AstBlockType.If) + { + throw new InvalidOperationException("Found an else without a matching if."); + } + + IAstNode cond = InverseCond(stmt.Condition); + + if (!ContainsCondComb(ifBlock.Condition, Instruction.LogicalAnd, cond)) + { + ifBlock.AndCondition(cond); + } + + last = ifBlock; + } + + Enclose(block, AstBlockType.If, stmt.Condition, Next(stmt.Goto), last); + + block.Remove(stmt.Goto); + + child.AddFirst(stmt.Goto); + + block = child; + } + } + + private static bool ContainsCondComb(IAstNode node, Instruction inst, IAstNode newCond) + { + while (node is AstOperation operation && operation.SourcesCount == 2) + { + if (operation.Inst == inst && IsSameCond(operation.GetSource(1), newCond)) + { + return true; + } + + node = operation.GetSource(0); + } + + return false; + } + + private static AstBlock EncloseDoWhile(GotoStatement stmt, AstBlock block, IAstNode first) + { + if (block.Type == AstBlockType.DoWhile && first == block.First) + { + // We only need to insert the continue if we're not at the end of the loop, + // or if our condition is different from the loop condition. + if (Next(stmt.Goto) != null || block.Condition != stmt.Condition) + { + EncloseSingleInst(stmt, Instruction.LoopContinue); + } + + // Modify the do-while condition to allow it to continue. + if (!ContainsCondComb(block.Condition, Instruction.LogicalOr, stmt.Condition)) + { + block.OrCondition(stmt.Condition); + } + + return block; + } + + return Enclose(block, AstBlockType.DoWhile, stmt.Condition, first, stmt.Goto); + } + + private static void EncloseSingleInst(GotoStatement stmt, Instruction inst) + { + AstBlock block = ParentBlock(stmt.Goto); + + AstBlock newBlock = new AstBlock(AstBlockType.If, stmt.Condition); + + block.AddAfter(stmt.Goto, newBlock); + + newBlock.AddFirst(new AstOperation(inst)); + } + + private static AstBlock Enclose( + AstBlock block, + AstBlockType type, + IAstNode cond, + IAstNode first, + IAstNode last = null) + { + if (first == last) + { + return null; + } + + if (type == AstBlockType.If) + { + cond = InverseCond(cond); + } + + // Do a quick check, if we are enclosing a single block, + // and the block type/condition matches the one we're going + // to create, then we don't need a new block, we can just + // return the old one. + bool hasSingleNode = Next(first) == last; + + if (hasSingleNode && BlockMatches(first, type, cond)) + { + return first as AstBlock; + } + + AstBlock newBlock = new AstBlock(type, cond); + + block.AddBefore(first, newBlock); + + while (first != last) + { + IAstNode next = Next(first); + + block.Remove(first); + + newBlock.Add(first); + + first = next; + } + + return newBlock; + } + + private static bool BlockMatches(IAstNode node, AstBlockType type, IAstNode cond) + { + if (!(node is AstBlock block)) + { + return false; + } + + return block.Type == type && IsSameCond(block.Condition, cond); + } + + private static bool IsSameCond(IAstNode lCond, IAstNode rCond) + { + if (lCond is AstOperation lCondOp && lCondOp.Inst == Instruction.LogicalNot) + { + if (!(rCond is AstOperation rCondOp) || rCondOp.Inst != lCondOp.Inst) + { + return false; + } + + lCond = lCondOp.GetSource(0); + rCond = rCondOp.GetSource(0); + } + + return lCond == rCond; + } + + private static AstBlock ParentBlock(IAstNode node) + { + if (node is AstBlock block) + { + return block.Parent; + } + + while (!(node is AstBlock)) + { + node = node.Parent; + } + + return node as AstBlock; + } + + private static AstBlock[] BackwardsPath(AstBlock top, AstBlock bottom) + { + AstBlock block = bottom; + + List<AstBlock> path = new List<AstBlock>(); + + while (block != top) + { + path.Add(block); + + block = block.Parent; + } + + return path.ToArray(); + } + + private static int Level(IAstNode node) + { + int level = 0; + + while (node != null) + { + level++; + + node = node.Parent; + } + + return level; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/GotoStatement.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/GotoStatement.cs new file mode 100644 index 00000000..25216e55 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/GotoStatement.cs @@ -0,0 +1,23 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; + +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + class GotoStatement + { + public AstOperation Goto { get; } + public AstAssignment Label { get; } + + public IAstNode Condition => Label.Destination; + + public bool IsLoop { get; set; } + + public bool IsUnconditional => Goto.Inst == Instruction.Branch; + + public GotoStatement(AstOperation branch, AstAssignment label, bool isLoop) + { + Goto = branch; + Label = label; + IsLoop = isLoop; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs new file mode 100644 index 00000000..d45f8d4e --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs @@ -0,0 +1,21 @@ +using System; + +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + [Flags] + enum HelperFunctionsMask + { + AtomicMinMaxS32Shared = 1 << 0, + AtomicMinMaxS32Storage = 1 << 1, + MultiplyHighS32 = 1 << 2, + MultiplyHighU32 = 1 << 3, + Shuffle = 1 << 4, + ShuffleDown = 1 << 5, + ShuffleUp = 1 << 6, + ShuffleXor = 1 << 7, + StoreSharedSmallInt = 1 << 8, + StoreStorageSmallInt = 1 << 9, + SwizzleAdd = 1 << 10, + FSI = 1 << 11 + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/IAstNode.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/IAstNode.cs new file mode 100644 index 00000000..5ececbb5 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/IAstNode.cs @@ -0,0 +1,11 @@ +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + interface IAstNode + { + AstBlock Parent { get; set; } + + LinkedListNode<IAstNode> LLNode { get; set; } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs new file mode 100644 index 00000000..8eccef23 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs @@ -0,0 +1,216 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System; + +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + static class InstructionInfo + { + private readonly struct InstInfo + { + public AggregateType DestType { get; } + + public AggregateType[] SrcTypes { get; } + + public InstInfo(AggregateType destType, params AggregateType[] srcTypes) + { + DestType = destType; + SrcTypes = srcTypes; + } + } + + private static InstInfo[] _infoTbl; + + static InstructionInfo() + { + _infoTbl = new InstInfo[(int)Instruction.Count]; + + // Inst Destination type Source 1 type Source 2 type Source 3 type Source 4 type + Add(Instruction.AtomicAdd, AggregateType.U32, AggregateType.S32, AggregateType.S32, AggregateType.U32); + Add(Instruction.AtomicAnd, AggregateType.U32, AggregateType.S32, AggregateType.S32, AggregateType.U32); + Add(Instruction.AtomicCompareAndSwap, AggregateType.U32, AggregateType.S32, AggregateType.S32, AggregateType.U32, AggregateType.U32); + Add(Instruction.AtomicMaxS32, AggregateType.S32, AggregateType.S32, AggregateType.S32, AggregateType.S32); + Add(Instruction.AtomicMaxU32, AggregateType.U32, AggregateType.S32, AggregateType.S32, AggregateType.U32); + Add(Instruction.AtomicMinS32, AggregateType.S32, AggregateType.S32, AggregateType.S32, AggregateType.S32); + Add(Instruction.AtomicMinU32, AggregateType.U32, AggregateType.S32, AggregateType.S32, AggregateType.U32); + Add(Instruction.AtomicOr, AggregateType.U32, AggregateType.S32, AggregateType.S32, AggregateType.U32); + Add(Instruction.AtomicSwap, AggregateType.U32, AggregateType.S32, AggregateType.S32, AggregateType.U32); + Add(Instruction.AtomicXor, AggregateType.U32, AggregateType.S32, AggregateType.S32, AggregateType.U32); + Add(Instruction.Absolute, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.Add, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.Ballot, AggregateType.U32, AggregateType.Bool); + Add(Instruction.BitCount, AggregateType.S32, AggregateType.S32); + Add(Instruction.BitfieldExtractS32, AggregateType.S32, AggregateType.S32, AggregateType.S32, AggregateType.S32); + Add(Instruction.BitfieldExtractU32, AggregateType.U32, AggregateType.U32, AggregateType.S32, AggregateType.S32); + Add(Instruction.BitfieldInsert, AggregateType.S32, AggregateType.S32, AggregateType.S32, AggregateType.S32, AggregateType.S32); + Add(Instruction.BitfieldReverse, AggregateType.S32, AggregateType.S32); + Add(Instruction.BitwiseAnd, AggregateType.S32, AggregateType.S32, AggregateType.S32); + Add(Instruction.BitwiseExclusiveOr, AggregateType.S32, AggregateType.S32, AggregateType.S32); + Add(Instruction.BitwiseNot, AggregateType.S32, AggregateType.S32); + Add(Instruction.BitwiseOr, AggregateType.S32, AggregateType.S32, AggregateType.S32); + Add(Instruction.BranchIfTrue, AggregateType.Void, AggregateType.Bool); + Add(Instruction.BranchIfFalse, AggregateType.Void, AggregateType.Bool); + Add(Instruction.Call, AggregateType.Scalar); + Add(Instruction.Ceiling, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.Clamp, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.ClampU32, AggregateType.U32, AggregateType.U32, AggregateType.U32, AggregateType.U32); + Add(Instruction.CompareEqual, AggregateType.Bool, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.CompareGreater, AggregateType.Bool, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.CompareGreaterOrEqual, AggregateType.Bool, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.CompareGreaterOrEqualU32, AggregateType.Bool, AggregateType.U32, AggregateType.U32); + Add(Instruction.CompareGreaterU32, AggregateType.Bool, AggregateType.U32, AggregateType.U32); + Add(Instruction.CompareLess, AggregateType.Bool, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.CompareLessOrEqual, AggregateType.Bool, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.CompareLessOrEqualU32, AggregateType.Bool, AggregateType.U32, AggregateType.U32); + Add(Instruction.CompareLessU32, AggregateType.Bool, AggregateType.U32, AggregateType.U32); + Add(Instruction.CompareNotEqual, AggregateType.Bool, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.ConditionalSelect, AggregateType.Scalar, AggregateType.Bool, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.ConvertFP32ToFP64, AggregateType.FP64, AggregateType.FP32); + Add(Instruction.ConvertFP64ToFP32, AggregateType.FP32, AggregateType.FP64); + Add(Instruction.ConvertFP32ToS32, AggregateType.S32, AggregateType.FP32); + Add(Instruction.ConvertFP32ToU32, AggregateType.U32, AggregateType.FP32); + Add(Instruction.ConvertFP64ToS32, AggregateType.S32, AggregateType.FP64); + Add(Instruction.ConvertFP64ToU32, AggregateType.U32, AggregateType.FP64); + Add(Instruction.ConvertS32ToFP32, AggregateType.FP32, AggregateType.S32); + Add(Instruction.ConvertS32ToFP64, AggregateType.FP64, AggregateType.S32); + Add(Instruction.ConvertU32ToFP32, AggregateType.FP32, AggregateType.U32); + Add(Instruction.ConvertU32ToFP64, AggregateType.FP64, AggregateType.U32); + Add(Instruction.Cosine, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.Ddx, AggregateType.FP32, AggregateType.FP32); + Add(Instruction.Ddy, AggregateType.FP32, AggregateType.FP32); + Add(Instruction.Divide, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.ExponentB2, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.FindLSB, AggregateType.S32, AggregateType.S32); + Add(Instruction.FindMSBS32, AggregateType.S32, AggregateType.S32); + Add(Instruction.FindMSBU32, AggregateType.S32, AggregateType.U32); + Add(Instruction.Floor, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.FusedMultiplyAdd, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.ImageLoad, AggregateType.FP32); + Add(Instruction.ImageStore, AggregateType.Void); + Add(Instruction.ImageAtomic, AggregateType.S32); + Add(Instruction.IsNan, AggregateType.Bool, AggregateType.Scalar); + Add(Instruction.Load, AggregateType.FP32); + Add(Instruction.LoadConstant, AggregateType.FP32, AggregateType.S32, AggregateType.S32); + Add(Instruction.LoadGlobal, AggregateType.U32, AggregateType.S32, AggregateType.S32); + Add(Instruction.LoadLocal, AggregateType.U32, AggregateType.S32); + Add(Instruction.LoadShared, AggregateType.U32, AggregateType.S32); + Add(Instruction.LoadStorage, AggregateType.U32, AggregateType.S32, AggregateType.S32); + Add(Instruction.Lod, AggregateType.FP32); + Add(Instruction.LogarithmB2, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.LogicalAnd, AggregateType.Bool, AggregateType.Bool, AggregateType.Bool); + Add(Instruction.LogicalExclusiveOr, AggregateType.Bool, AggregateType.Bool, AggregateType.Bool); + Add(Instruction.LogicalNot, AggregateType.Bool, AggregateType.Bool); + Add(Instruction.LogicalOr, AggregateType.Bool, AggregateType.Bool, AggregateType.Bool); + Add(Instruction.Maximum, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.MaximumU32, AggregateType.U32, AggregateType.U32, AggregateType.U32); + Add(Instruction.Minimum, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.MinimumU32, AggregateType.U32, AggregateType.U32, AggregateType.U32); + Add(Instruction.Multiply, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.MultiplyHighS32, AggregateType.S32, AggregateType.S32, AggregateType.S32); + Add(Instruction.MultiplyHighU32, AggregateType.U32, AggregateType.U32, AggregateType.U32); + Add(Instruction.Negate, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.PackDouble2x32, AggregateType.FP64, AggregateType.U32, AggregateType.U32); + Add(Instruction.PackHalf2x16, AggregateType.U32, AggregateType.FP32, AggregateType.FP32); + Add(Instruction.ReciprocalSquareRoot, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.Round, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.ShiftLeft, AggregateType.S32, AggregateType.S32, AggregateType.S32); + Add(Instruction.ShiftRightS32, AggregateType.S32, AggregateType.S32, AggregateType.S32); + Add(Instruction.ShiftRightU32, AggregateType.U32, AggregateType.U32, AggregateType.S32); + Add(Instruction.Shuffle, AggregateType.FP32, AggregateType.FP32, AggregateType.U32, AggregateType.U32, AggregateType.Bool); + Add(Instruction.ShuffleDown, AggregateType.FP32, AggregateType.FP32, AggregateType.U32, AggregateType.U32, AggregateType.Bool); + Add(Instruction.ShuffleUp, AggregateType.FP32, AggregateType.FP32, AggregateType.U32, AggregateType.U32, AggregateType.Bool); + Add(Instruction.ShuffleXor, AggregateType.FP32, AggregateType.FP32, AggregateType.U32, AggregateType.U32, AggregateType.Bool); + Add(Instruction.Sine, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.SquareRoot, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.Store, AggregateType.Void); + Add(Instruction.StoreGlobal, AggregateType.Void, AggregateType.S32, AggregateType.S32, AggregateType.U32); + Add(Instruction.StoreLocal, AggregateType.Void, AggregateType.S32, AggregateType.U32); + Add(Instruction.StoreShared, AggregateType.Void, AggregateType.S32, AggregateType.U32); + Add(Instruction.StoreShared16, AggregateType.Void, AggregateType.S32, AggregateType.U32); + Add(Instruction.StoreShared8, AggregateType.Void, AggregateType.S32, AggregateType.U32); + Add(Instruction.StoreStorage, AggregateType.Void, AggregateType.S32, AggregateType.S32, AggregateType.U32); + Add(Instruction.StoreStorage16, AggregateType.Void, AggregateType.S32, AggregateType.S32, AggregateType.U32); + Add(Instruction.StoreStorage8, AggregateType.Void, AggregateType.S32, AggregateType.S32, AggregateType.U32); + Add(Instruction.Subtract, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.SwizzleAdd, AggregateType.FP32, AggregateType.FP32, AggregateType.FP32, AggregateType.S32); + Add(Instruction.TextureSample, AggregateType.FP32); + Add(Instruction.TextureSize, AggregateType.S32, AggregateType.S32, AggregateType.S32); + Add(Instruction.Truncate, AggregateType.Scalar, AggregateType.Scalar); + Add(Instruction.UnpackDouble2x32, AggregateType.U32, AggregateType.FP64); + Add(Instruction.UnpackHalf2x16, AggregateType.FP32, AggregateType.U32); + Add(Instruction.VectorExtract, AggregateType.Scalar, AggregateType.Vector4, AggregateType.S32); + Add(Instruction.VoteAll, AggregateType.Bool, AggregateType.Bool); + Add(Instruction.VoteAllEqual, AggregateType.Bool, AggregateType.Bool); + Add(Instruction.VoteAny, AggregateType.Bool, AggregateType.Bool); + } + + private static void Add(Instruction inst, AggregateType destType, params AggregateType[] srcTypes) + { + _infoTbl[(int)inst] = new InstInfo(destType, srcTypes); + } + + public static AggregateType GetDestVarType(Instruction inst) + { + return GetFinalVarType(_infoTbl[(int)(inst & Instruction.Mask)].DestType, inst); + } + + public static AggregateType GetSrcVarType(Instruction inst, int index) + { + // TODO: Return correct type depending on source index, + // that can improve the decompiler output. + if (inst == Instruction.ImageLoad || + inst == Instruction.ImageStore || + inst == Instruction.ImageAtomic || + inst == Instruction.Lod || + inst == Instruction.TextureSample) + { + return AggregateType.FP32; + } + else if (inst == Instruction.Call || inst == Instruction.Load || inst == Instruction.Store) + { + return AggregateType.S32; + } + + return GetFinalVarType(_infoTbl[(int)(inst & Instruction.Mask)].SrcTypes[index], inst); + } + + private static AggregateType GetFinalVarType(AggregateType type, Instruction inst) + { + if (type == AggregateType.Scalar) + { + if ((inst & Instruction.FP32) != 0) + { + return AggregateType.FP32; + } + else if ((inst & Instruction.FP64) != 0) + { + return AggregateType.FP64; + } + else + { + return AggregateType.S32; + } + } + else if (type == AggregateType.Void) + { + throw new ArgumentException($"Invalid operand for instruction \"{inst}\"."); + } + + return type; + } + + public static bool IsUnary(Instruction inst) + { + if (inst == Instruction.Copy) + { + return true; + } + else if (inst == Instruction.TextureSample) + { + return false; + } + + return _infoTbl[(int)(inst & Instruction.Mask)].SrcTypes.Length == 1; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/IoDefinition.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/IoDefinition.cs new file mode 100644 index 00000000..21a1b3f0 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/IoDefinition.cs @@ -0,0 +1,44 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System; + +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + readonly struct IoDefinition : IEquatable<IoDefinition> + { + public StorageKind StorageKind { get; } + public IoVariable IoVariable { get; } + public int Location { get; } + public int Component { get; } + + public IoDefinition(StorageKind storageKind, IoVariable ioVariable, int location = 0, int component = 0) + { + StorageKind = storageKind; + IoVariable = ioVariable; + Location = location; + Component = component; + } + + public override bool Equals(object other) + { + return other is IoDefinition ioDefinition && Equals(ioDefinition); + } + + public bool Equals(IoDefinition other) + { + return StorageKind == other.StorageKind && + IoVariable == other.IoVariable && + Location == other.Location && + Component == other.Component; + } + + public override int GetHashCode() + { + return (int)StorageKind | ((int)IoVariable << 8) | (Location << 16) | (Component << 24); + } + + public override string ToString() + { + return $"{StorageKind}.{IoVariable}.{Location}.{Component}"; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/OperandInfo.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/OperandInfo.cs new file mode 100644 index 00000000..38ed1584 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/OperandInfo.cs @@ -0,0 +1,33 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System; + +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + static class OperandInfo + { + public static AggregateType GetVarType(AstOperand operand) + { + if (operand.Type == OperandType.LocalVariable) + { + return operand.VarType; + } + else + { + return GetVarType(operand.Type); + } + } + + public static AggregateType GetVarType(OperandType type) + { + return type switch + { + OperandType.Argument => AggregateType.S32, + OperandType.Constant => AggregateType.S32, + OperandType.ConstantBuffer => AggregateType.FP32, + OperandType.Undefined => AggregateType.S32, + _ => throw new ArgumentException($"Invalid operand type \"{type}\".") + }; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/PhiFunctions.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/PhiFunctions.cs new file mode 100644 index 00000000..541ca298 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/PhiFunctions.cs @@ -0,0 +1,45 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + static class PhiFunctions + { + public static void Remove(BasicBlock[] blocks) + { + for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) + { + BasicBlock block = blocks[blkIndex]; + + LinkedListNode<INode> node = block.Operations.First; + + while (node != null) + { + LinkedListNode<INode> nextNode = node.Next; + + if (node.Value is not PhiNode phi) + { + node = nextNode; + + continue; + } + + for (int index = 0; index < phi.SourcesCount; index++) + { + Operand src = phi.GetSource(index); + + BasicBlock srcBlock = phi.GetBlock(index); + + Operation copyOp = new Operation(Instruction.Copy, phi.Dest, src); + + srcBlock.Append(copyOp); + } + + block.Operations.Remove(node); + + node = nextNode; + } + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredFunction.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredFunction.cs new file mode 100644 index 00000000..61c4fed7 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredFunction.cs @@ -0,0 +1,42 @@ +using Ryujinx.Graphics.Shader.Translation; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + class StructuredFunction + { + public AstBlock MainBlock { get; } + + public string Name { get; } + + public AggregateType ReturnType { get; } + + public AggregateType[] InArguments { get; } + public AggregateType[] OutArguments { get; } + + public HashSet<AstOperand> Locals { get; } + + public StructuredFunction( + AstBlock mainBlock, + string name, + AggregateType returnType, + AggregateType[] inArguments, + AggregateType[] outArguments) + { + MainBlock = mainBlock; + Name = name; + ReturnType = returnType; + InArguments = inArguments; + OutArguments = outArguments; + + Locals = new HashSet<AstOperand>(); + } + + public AggregateType GetArgumentType(int index) + { + return index >= InArguments.Length + ? OutArguments[index - InArguments.Length] + : InArguments[index]; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs new file mode 100644 index 00000000..b4ca8ee5 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs @@ -0,0 +1,421 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Collections.Generic; +using System.Numerics; + +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + static class StructuredProgram + { + public static StructuredProgramInfo MakeStructuredProgram(Function[] functions, ShaderConfig config) + { + StructuredProgramContext context = new StructuredProgramContext(config); + + for (int funcIndex = 0; funcIndex < functions.Length; funcIndex++) + { + Function function = functions[funcIndex]; + + BasicBlock[] blocks = function.Blocks; + + AggregateType returnType = function.ReturnsValue ? AggregateType.S32 : AggregateType.Void; + + AggregateType[] inArguments = new AggregateType[function.InArgumentsCount]; + AggregateType[] outArguments = new AggregateType[function.OutArgumentsCount]; + + for (int i = 0; i < inArguments.Length; i++) + { + inArguments[i] = AggregateType.S32; + } + + for (int i = 0; i < outArguments.Length; i++) + { + outArguments[i] = AggregateType.S32; + } + + context.EnterFunction(blocks.Length, function.Name, returnType, inArguments, outArguments); + + PhiFunctions.Remove(blocks); + + for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) + { + BasicBlock block = blocks[blkIndex]; + + context.EnterBlock(block); + + for (LinkedListNode<INode> opNode = block.Operations.First; opNode != null; opNode = opNode.Next) + { + Operation operation = (Operation)opNode.Value; + + if (IsBranchInst(operation.Inst)) + { + context.LeaveBlock(block, operation); + } + else + { + AddOperation(context, operation); + } + } + } + + GotoElimination.Eliminate(context.GetGotos()); + + AstOptimizer.Optimize(context); + + context.LeaveFunction(); + } + + return context.Info; + } + + private static void AddOperation(StructuredProgramContext context, Operation operation) + { + Instruction inst = operation.Inst; + StorageKind storageKind = operation.StorageKind; + + if ((inst == Instruction.Load || inst == Instruction.Store) && storageKind.IsInputOrOutput()) + { + IoVariable ioVariable = (IoVariable)operation.GetSource(0).Value; + bool isOutput = storageKind.IsOutput(); + bool perPatch = storageKind.IsPerPatch(); + int location = 0; + int component = 0; + + if (context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput)) + { + location = operation.GetSource(1).Value; + + if (operation.SourcesCount > 2 && + operation.GetSource(2).Type == OperandType.Constant && + context.Config.HasPerLocationInputOrOutputComponent(ioVariable, location, operation.GetSource(2).Value, isOutput)) + { + component = operation.GetSource(2).Value; + } + } + + context.Info.IoDefinitions.Add(new IoDefinition(storageKind, ioVariable, location, component)); + } + + bool vectorDest = IsVectorDestInst(inst); + + int sourcesCount = operation.SourcesCount; + int outDestsCount = operation.DestsCount != 0 && !vectorDest ? operation.DestsCount - 1 : 0; + + IAstNode[] sources = new IAstNode[sourcesCount + outDestsCount]; + + for (int index = 0; index < operation.SourcesCount; index++) + { + sources[index] = context.GetOperand(operation.GetSource(index)); + } + + for (int index = 0; index < outDestsCount; index++) + { + AstOperand oper = context.GetOperand(operation.GetDest(1 + index)); + + oper.VarType = InstructionInfo.GetSrcVarType(inst, sourcesCount + index); + + sources[sourcesCount + index] = oper; + } + + AstTextureOperation GetAstTextureOperation(TextureOperation texOp) + { + return new AstTextureOperation( + inst, + texOp.Type, + texOp.Format, + texOp.Flags, + texOp.CbufSlot, + texOp.Handle, + texOp.Index, + sources); + } + + int componentsCount = BitOperations.PopCount((uint)operation.Index); + + if (vectorDest && componentsCount > 1) + { + AggregateType destType = InstructionInfo.GetDestVarType(inst); + + IAstNode source; + + if (operation is TextureOperation texOp) + { + if (texOp.Inst == Instruction.ImageLoad) + { + destType = texOp.Format.GetComponentType(); + } + + source = GetAstTextureOperation(texOp); + } + else + { + source = new AstOperation(inst, operation.StorageKind, operation.Index, sources, operation.SourcesCount); + } + + AggregateType destElemType = destType; + + switch (componentsCount) + { + case 2: destType |= AggregateType.Vector2; break; + case 3: destType |= AggregateType.Vector3; break; + case 4: destType |= AggregateType.Vector4; break; + } + + AstOperand destVec = context.NewTemp(destType); + + context.AddNode(new AstAssignment(destVec, source)); + + for (int i = 0; i < operation.DestsCount; i++) + { + AstOperand dest = context.GetOperand(operation.GetDest(i)); + AstOperand index = new AstOperand(OperandType.Constant, i); + + dest.VarType = destElemType; + + context.AddNode(new AstAssignment(dest, new AstOperation(Instruction.VectorExtract, StorageKind.None, new[] { destVec, index }, 2))); + } + } + else if (operation.Dest != null) + { + AstOperand dest = context.GetOperand(operation.Dest); + + // If all the sources are bool, it's better to use short-circuiting + // logical operations, rather than forcing a cast to int and doing + // a bitwise operation with the value, as it is likely to be used as + // a bool in the end. + if (IsBitwiseInst(inst) && AreAllSourceTypesEqual(sources, AggregateType.Bool)) + { + inst = GetLogicalFromBitwiseInst(inst); + } + + bool isCondSel = inst == Instruction.ConditionalSelect; + bool isCopy = inst == Instruction.Copy; + + if (isCondSel || isCopy) + { + AggregateType type = GetVarTypeFromUses(operation.Dest); + + if (isCondSel && type == AggregateType.FP32) + { + inst |= Instruction.FP32; + } + + dest.VarType = type; + } + else + { + dest.VarType = InstructionInfo.GetDestVarType(inst); + } + + IAstNode source; + + if (operation is TextureOperation texOp) + { + if (texOp.Inst == Instruction.ImageLoad) + { + dest.VarType = texOp.Format.GetComponentType(); + } + + source = GetAstTextureOperation(texOp); + } + else if (!isCopy) + { + source = new AstOperation(inst, operation.StorageKind, operation.Index, sources, operation.SourcesCount); + } + else + { + source = sources[0]; + } + + context.AddNode(new AstAssignment(dest, source)); + } + else if (operation.Inst == Instruction.Comment) + { + context.AddNode(new AstComment(((CommentNode)operation).Comment)); + } + else if (operation is TextureOperation texOp) + { + AstTextureOperation astTexOp = GetAstTextureOperation(texOp); + + context.AddNode(astTexOp); + } + else + { + context.AddNode(new AstOperation(inst, operation.StorageKind, operation.Index, sources, operation.SourcesCount)); + } + + // Those instructions needs to be emulated by using helper functions, + // because they are NVIDIA specific. Those flags helps the backend to + // decide which helper functions are needed on the final generated code. + switch (operation.Inst) + { + case Instruction.AtomicMaxS32: + case Instruction.AtomicMinS32: + if (operation.StorageKind == StorageKind.SharedMemory) + { + context.Info.HelperFunctionsMask |= HelperFunctionsMask.AtomicMinMaxS32Shared; + } + else if (operation.StorageKind == StorageKind.StorageBuffer) + { + context.Info.HelperFunctionsMask |= HelperFunctionsMask.AtomicMinMaxS32Storage; + } + break; + case Instruction.MultiplyHighS32: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.MultiplyHighS32; + break; + case Instruction.MultiplyHighU32: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.MultiplyHighU32; + break; + case Instruction.Shuffle: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.Shuffle; + break; + case Instruction.ShuffleDown: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.ShuffleDown; + break; + case Instruction.ShuffleUp: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.ShuffleUp; + break; + case Instruction.ShuffleXor: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.ShuffleXor; + break; + case Instruction.StoreShared16: + case Instruction.StoreShared8: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.StoreSharedSmallInt; + break; + case Instruction.StoreStorage16: + case Instruction.StoreStorage8: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.StoreStorageSmallInt; + break; + case Instruction.SwizzleAdd: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.SwizzleAdd; + break; + case Instruction.FSIBegin: + case Instruction.FSIEnd: + context.Info.HelperFunctionsMask |= HelperFunctionsMask.FSI; + break; + } + } + + private static AggregateType GetVarTypeFromUses(Operand dest) + { + HashSet<Operand> visited = new HashSet<Operand>(); + + Queue<Operand> pending = new Queue<Operand>(); + + bool Enqueue(Operand operand) + { + if (visited.Add(operand)) + { + pending.Enqueue(operand); + + return true; + } + + return false; + } + + Enqueue(dest); + + while (pending.TryDequeue(out Operand operand)) + { + foreach (INode useNode in operand.UseOps) + { + if (useNode is not Operation operation) + { + continue; + } + + if (operation.Inst == Instruction.Copy) + { + if (operation.Dest.Type == OperandType.LocalVariable) + { + if (Enqueue(operation.Dest)) + { + break; + } + } + else + { + return OperandInfo.GetVarType(operation.Dest.Type); + } + } + else + { + for (int index = 0; index < operation.SourcesCount; index++) + { + if (operation.GetSource(index) == operand) + { + return InstructionInfo.GetSrcVarType(operation.Inst, index); + } + } + } + } + } + + return AggregateType.S32; + } + + private static bool AreAllSourceTypesEqual(IAstNode[] sources, AggregateType type) + { + foreach (IAstNode node in sources) + { + if (node is not AstOperand operand) + { + return false; + } + + if (operand.VarType != type) + { + return false; + } + } + + return true; + } + + private static bool IsVectorDestInst(Instruction inst) + { + return inst switch + { + Instruction.ImageLoad or + Instruction.TextureSample => true, + _ => false + }; + } + + private static bool IsBranchInst(Instruction inst) + { + return inst switch + { + Instruction.Branch or + Instruction.BranchIfFalse or + Instruction.BranchIfTrue => true, + _ => false + }; + } + + private static bool IsBitwiseInst(Instruction inst) + { + return inst switch + { + Instruction.BitwiseAnd or + Instruction.BitwiseExclusiveOr or + Instruction.BitwiseNot or + Instruction.BitwiseOr => true, + _ => false + }; + } + + private static Instruction GetLogicalFromBitwiseInst(Instruction inst) + { + return inst switch + { + Instruction.BitwiseAnd => Instruction.LogicalAnd, + Instruction.BitwiseExclusiveOr => Instruction.LogicalExclusiveOr, + Instruction.BitwiseNot => Instruction.LogicalNot, + Instruction.BitwiseOr => Instruction.LogicalOr, + _ => throw new ArgumentException($"Unexpected instruction \"{inst}\".") + }; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramContext.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramContext.cs new file mode 100644 index 00000000..68bbdeb1 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramContext.cs @@ -0,0 +1,330 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System.Collections.Generic; +using System.Linq; +using System.Numerics; + +using static Ryujinx.Graphics.Shader.StructuredIr.AstHelper; + +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + class StructuredProgramContext + { + private HashSet<BasicBlock> _loopTails; + + private Stack<(AstBlock Block, int CurrEndIndex, int LoopEndIndex)> _blockStack; + + private Dictionary<Operand, AstOperand> _localsMap; + + private Dictionary<int, AstAssignment> _gotoTempAsgs; + + private List<GotoStatement> _gotos; + + private AstBlock _currBlock; + + private int _currEndIndex; + private int _loopEndIndex; + + public StructuredFunction CurrentFunction { get; private set; } + + public StructuredProgramInfo Info { get; } + + public ShaderConfig Config { get; } + + public StructuredProgramContext(ShaderConfig config) + { + Info = new StructuredProgramInfo(); + + Config = config; + + if (config.GpPassthrough) + { + int passthroughAttributes = config.PassthroughAttributes; + while (passthroughAttributes != 0) + { + int index = BitOperations.TrailingZeroCount(passthroughAttributes); + + Info.IoDefinitions.Add(new IoDefinition(StorageKind.Input, IoVariable.UserDefined, index)); + + passthroughAttributes &= ~(1 << index); + } + + Info.IoDefinitions.Add(new IoDefinition(StorageKind.Input, IoVariable.Position)); + Info.IoDefinitions.Add(new IoDefinition(StorageKind.Input, IoVariable.PointSize)); + Info.IoDefinitions.Add(new IoDefinition(StorageKind.Input, IoVariable.ClipDistance)); + } + else if (config.Stage == ShaderStage.Fragment) + { + // Potentially used for texture coordinate scaling. + Info.IoDefinitions.Add(new IoDefinition(StorageKind.Input, IoVariable.FragmentCoord)); + } + } + + public void EnterFunction( + int blocksCount, + string name, + AggregateType returnType, + AggregateType[] inArguments, + AggregateType[] outArguments) + { + _loopTails = new HashSet<BasicBlock>(); + + _blockStack = new Stack<(AstBlock, int, int)>(); + + _localsMap = new Dictionary<Operand, AstOperand>(); + + _gotoTempAsgs = new Dictionary<int, AstAssignment>(); + + _gotos = new List<GotoStatement>(); + + _currBlock = new AstBlock(AstBlockType.Main); + + _currEndIndex = blocksCount; + _loopEndIndex = blocksCount; + + CurrentFunction = new StructuredFunction(_currBlock, name, returnType, inArguments, outArguments); + } + + public void LeaveFunction() + { + Info.Functions.Add(CurrentFunction); + } + + public void EnterBlock(BasicBlock block) + { + while (_currEndIndex == block.Index) + { + (_currBlock, _currEndIndex, _loopEndIndex) = _blockStack.Pop(); + } + + if (_gotoTempAsgs.TryGetValue(block.Index, out AstAssignment gotoTempAsg)) + { + AddGotoTempReset(block, gotoTempAsg); + } + + LookForDoWhileStatements(block); + } + + public void LeaveBlock(BasicBlock block, Operation branchOp) + { + LookForIfStatements(block, branchOp); + } + + private void LookForDoWhileStatements(BasicBlock block) + { + // Check if we have any predecessor whose index is greater than the + // current block, this indicates a loop. + bool done = false; + + foreach (BasicBlock predecessor in block.Predecessors.OrderByDescending(x => x.Index)) + { + // If not a loop, break. + if (predecessor.Index < block.Index) + { + break; + } + + // Check if we can create a do-while loop here (only possible if the loop end + // falls inside the current scope), if not add a goto instead. + if (predecessor.Index < _currEndIndex && !done) + { + // Create do-while loop block. We must avoid inserting a goto at the end + // of the loop later, when the tail block is processed. So we add the predecessor + // to a list of loop tails to prevent it from being processed later. + Operation branchOp = (Operation)predecessor.GetLastOp(); + + NewBlock(AstBlockType.DoWhile, branchOp, predecessor.Index + 1); + + _loopTails.Add(predecessor); + + done = true; + } + else + { + // Failed to create loop. Since this block is the loop head, we reset the + // goto condition variable here. The variable is always reset on the jump + // target, and this block is the jump target for some loop. + AddGotoTempReset(block, GetGotoTempAsg(block.Index)); + + break; + } + } + } + + private void LookForIfStatements(BasicBlock block, Operation branchOp) + { + if (block.Branch == null) + { + return; + } + + // We can only enclose the "if" when the branch lands before + // the end of the current block. If the current enclosing block + // is not a loop, then we can also do so if the branch lands + // right at the end of the current block. When it is a loop, + // this is not valid as the loop condition would be evaluated, + // and it could erroneously jump back to the start of the loop. + bool inRange = + block.Branch.Index < _currEndIndex || + (block.Branch.Index == _currEndIndex && block.Branch.Index < _loopEndIndex); + + bool isLoop = block.Branch.Index <= block.Index; + + if (inRange && !isLoop) + { + NewBlock(AstBlockType.If, branchOp, block.Branch.Index); + } + else if (!_loopTails.Contains(block)) + { + AstAssignment gotoTempAsg = GetGotoTempAsg(block.Branch.Index); + + // We use DoWhile type here, as the condition should be true for + // unconditional branches, or it should jump if the condition is true otherwise. + IAstNode cond = GetBranchCond(AstBlockType.DoWhile, branchOp); + + AddNode(Assign(gotoTempAsg.Destination, cond)); + + AstOperation branch = new AstOperation(branchOp.Inst); + + AddNode(branch); + + GotoStatement gotoStmt = new GotoStatement(branch, gotoTempAsg, isLoop); + + _gotos.Add(gotoStmt); + } + } + + private AstAssignment GetGotoTempAsg(int index) + { + if (_gotoTempAsgs.TryGetValue(index, out AstAssignment gotoTempAsg)) + { + return gotoTempAsg; + } + + AstOperand gotoTemp = NewTemp(AggregateType.Bool); + + gotoTempAsg = Assign(gotoTemp, Const(IrConsts.False)); + + _gotoTempAsgs.Add(index, gotoTempAsg); + + return gotoTempAsg; + } + + private void AddGotoTempReset(BasicBlock block, AstAssignment gotoTempAsg) + { + // If it was already added, we don't need to add it again. + if (gotoTempAsg.Parent != null) + { + return; + } + + AddNode(gotoTempAsg); + + // For block 0, we don't need to add the extra "reset" at the beginning, + // because it is already the first node to be executed on the shader, + // so it is reset to false by the "local" assignment anyway. + if (block.Index != 0) + { + CurrentFunction.MainBlock.AddFirst(Assign(gotoTempAsg.Destination, Const(IrConsts.False))); + } + } + + private void NewBlock(AstBlockType type, Operation branchOp, int endIndex) + { + NewBlock(type, GetBranchCond(type, branchOp), endIndex); + } + + private void NewBlock(AstBlockType type, IAstNode cond, int endIndex) + { + AstBlock childBlock = new AstBlock(type, cond); + + AddNode(childBlock); + + _blockStack.Push((_currBlock, _currEndIndex, _loopEndIndex)); + + _currBlock = childBlock; + _currEndIndex = endIndex; + + if (type == AstBlockType.DoWhile) + { + _loopEndIndex = endIndex; + } + } + + private IAstNode GetBranchCond(AstBlockType type, Operation branchOp) + { + IAstNode cond; + + if (branchOp.Inst == Instruction.Branch) + { + // If the branch is not conditional, the condition is a constant. + // For if it's false (always jump over, if block never executed). + // For loops it's always true (always loop). + cond = Const(type == AstBlockType.If ? IrConsts.False : IrConsts.True); + } + else + { + cond = GetOperand(branchOp.GetSource(0)); + + Instruction invInst = type == AstBlockType.If + ? Instruction.BranchIfTrue + : Instruction.BranchIfFalse; + + if (branchOp.Inst == invInst) + { + cond = new AstOperation(Instruction.LogicalNot, cond); + } + } + + return cond; + } + + public void AddNode(IAstNode node) + { + _currBlock.Add(node); + } + + public GotoStatement[] GetGotos() + { + return _gotos.ToArray(); + } + + public AstOperand NewTemp(AggregateType type) + { + AstOperand newTemp = Local(type); + + CurrentFunction.Locals.Add(newTemp); + + return newTemp; + } + + public AstOperand GetOperand(Operand operand) + { + if (operand == null) + { + return null; + } + + if (operand.Type != OperandType.LocalVariable) + { + if (operand.Type == OperandType.ConstantBuffer) + { + Config.SetUsedConstantBuffer(operand.GetCbufSlot()); + } + + return new AstOperand(operand); + } + + if (!_localsMap.TryGetValue(operand, out AstOperand astOperand)) + { + astOperand = new AstOperand(operand); + + _localsMap.Add(operand, astOperand); + + CurrentFunction.Locals.Add(astOperand); + } + + return astOperand; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs new file mode 100644 index 00000000..c5104146 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs @@ -0,0 +1,36 @@ +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Shader.StructuredIr +{ + readonly struct TransformFeedbackOutput + { + public readonly bool Valid; + public readonly int Buffer; + public readonly int Offset; + public readonly int Stride; + + public TransformFeedbackOutput(int buffer, int offset, int stride) + { + Valid = true; + Buffer = buffer; + Offset = offset; + Stride = stride; + } + } + + class StructuredProgramInfo + { + public List<StructuredFunction> Functions { get; } + + public HashSet<IoDefinition> IoDefinitions { get; } + + public HelperFunctionsMask HelperFunctionsMask { get; set; } + + public StructuredProgramInfo() + { + Functions = new List<StructuredFunction>(); + + IoDefinitions = new HashSet<IoDefinition>(); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/SupportBuffer.cs b/src/Ryujinx.Graphics.Shader/SupportBuffer.cs new file mode 100644 index 00000000..5fe99327 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/SupportBuffer.cs @@ -0,0 +1,58 @@ +using Ryujinx.Common.Memory; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Shader +{ + public struct Vector4<T> + { + public T X; + public T Y; + public T Z; + public T W; + } + + public struct SupportBuffer + { + public static int FieldSize; + public static int RequiredSize; + + public static int FragmentAlphaTestOffset; + public static int FragmentIsBgraOffset; + public static int ViewportInverseOffset; + public static int FragmentRenderScaleCountOffset; + public static int GraphicsRenderScaleOffset; + public static int ComputeRenderScaleOffset; + + public const int FragmentIsBgraCount = 8; + // One for the render target, 64 for the textures, and 8 for the images. + public const int RenderScaleMaxCount = 1 + 64 + 8; + + private static int OffsetOf<T>(ref SupportBuffer storage, ref T target) + { + return (int)Unsafe.ByteOffset(ref Unsafe.As<SupportBuffer, T>(ref storage), ref target); + } + + static SupportBuffer() + { + FieldSize = Unsafe.SizeOf<Vector4<float>>(); + RequiredSize = Unsafe.SizeOf<SupportBuffer>(); + + SupportBuffer instance = new SupportBuffer(); + + FragmentAlphaTestOffset = OffsetOf(ref instance, ref instance.FragmentAlphaTest); + FragmentIsBgraOffset = OffsetOf(ref instance, ref instance.FragmentIsBgra); + ViewportInverseOffset = OffsetOf(ref instance, ref instance.ViewportInverse); + FragmentRenderScaleCountOffset = OffsetOf(ref instance, ref instance.FragmentRenderScaleCount); + GraphicsRenderScaleOffset = OffsetOf(ref instance, ref instance.RenderScale); + ComputeRenderScaleOffset = GraphicsRenderScaleOffset + FieldSize; + } + + public Vector4<int> FragmentAlphaTest; + public Array8<Vector4<int>> FragmentIsBgra; + public Vector4<float> ViewportInverse; + public Vector4<int> FragmentRenderScaleCount; + + // Render scale max count: 1 + 64 + 8. First scale is fragment output scale, others are textures/image inputs. + public Array73<Vector4<float>> RenderScale; + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/TessPatchType.cs b/src/Ryujinx.Graphics.Shader/TessPatchType.cs new file mode 100644 index 00000000..2361b69f --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/TessPatchType.cs @@ -0,0 +1,22 @@ +namespace Ryujinx.Graphics.Shader +{ + public enum TessPatchType + { + Isolines = 0, + Triangles = 1, + Quads = 2 + } + + static class TessPatchTypeExtensions + { + public static string ToGlsl(this TessPatchType type) + { + return type switch + { + TessPatchType.Isolines => "isolines", + TessPatchType.Quads => "quads", + _ => "triangles" + }; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/TessSpacing.cs b/src/Ryujinx.Graphics.Shader/TessSpacing.cs new file mode 100644 index 00000000..35c44190 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/TessSpacing.cs @@ -0,0 +1,22 @@ +namespace Ryujinx.Graphics.Shader +{ + public enum TessSpacing + { + EqualSpacing = 0, + FractionalEventSpacing = 1, + FractionalOddSpacing = 2 + } + + static class TessSpacingExtensions + { + public static string ToGlsl(this TessSpacing spacing) + { + return spacing switch + { + TessSpacing.FractionalEventSpacing => "fractional_even_spacing", + TessSpacing.FractionalOddSpacing => "fractional_odd_spacing", + _ => "equal_spacing" + }; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/TextureDescriptor.cs b/src/Ryujinx.Graphics.Shader/TextureDescriptor.cs new file mode 100644 index 00000000..85ea9adb --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/TextureDescriptor.cs @@ -0,0 +1,34 @@ +namespace Ryujinx.Graphics.Shader +{ + public struct TextureDescriptor + { + // New fields should be added to the end of the struct to keep disk shader cache compatibility. + + public readonly int Binding; + + public readonly SamplerType Type; + public readonly TextureFormat Format; + + public readonly int CbufSlot; + public readonly int HandleIndex; + + public TextureUsageFlags Flags; + + public TextureDescriptor(int binding, SamplerType type, TextureFormat format, int cbufSlot, int handleIndex) + { + Binding = binding; + Type = type; + Format = format; + CbufSlot = cbufSlot; + HandleIndex = handleIndex; + Flags = TextureUsageFlags.None; + } + + public TextureDescriptor SetFlag(TextureUsageFlags flag) + { + Flags |= flag; + + return this; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/TextureFormat.cs b/src/Ryujinx.Graphics.Shader/TextureFormat.cs new file mode 100644 index 00000000..d4c8b96b --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/TextureFormat.cs @@ -0,0 +1,128 @@ +using Ryujinx.Graphics.Shader.Translation; + +namespace Ryujinx.Graphics.Shader +{ + public enum TextureFormat + { + Unknown, + R8Unorm, + R8Snorm, + R8Uint, + R8Sint, + R16Float, + R16Unorm, + R16Snorm, + R16Uint, + R16Sint, + R32Float, + R32Uint, + R32Sint, + R8G8Unorm, + R8G8Snorm, + R8G8Uint, + R8G8Sint, + R16G16Float, + R16G16Unorm, + R16G16Snorm, + R16G16Uint, + R16G16Sint, + R32G32Float, + R32G32Uint, + R32G32Sint, + R8G8B8A8Unorm, + R8G8B8A8Snorm, + R8G8B8A8Uint, + R8G8B8A8Sint, + R16G16B16A16Float, + R16G16B16A16Unorm, + R16G16B16A16Snorm, + R16G16B16A16Uint, + R16G16B16A16Sint, + R32G32B32A32Float, + R32G32B32A32Uint, + R32G32B32A32Sint, + R10G10B10A2Unorm, + R10G10B10A2Uint, + R11G11B10Float + } + + static class TextureFormatExtensions + { + public static string ToGlslFormat(this TextureFormat format) + { + return format switch + { + TextureFormat.R8Unorm => "r8", + TextureFormat.R8Snorm => "r8_snorm", + TextureFormat.R8Uint => "r8ui", + TextureFormat.R8Sint => "r8i", + TextureFormat.R16Float => "r16f", + TextureFormat.R16Unorm => "r16", + TextureFormat.R16Snorm => "r16_snorm", + TextureFormat.R16Uint => "r16ui", + TextureFormat.R16Sint => "r16i", + TextureFormat.R32Float => "r32f", + TextureFormat.R32Uint => "r32ui", + TextureFormat.R32Sint => "r32i", + TextureFormat.R8G8Unorm => "rg8", + TextureFormat.R8G8Snorm => "rg8_snorm", + TextureFormat.R8G8Uint => "rg8ui", + TextureFormat.R8G8Sint => "rg8i", + TextureFormat.R16G16Float => "rg16f", + TextureFormat.R16G16Unorm => "rg16", + TextureFormat.R16G16Snorm => "rg16_snorm", + TextureFormat.R16G16Uint => "rg16ui", + TextureFormat.R16G16Sint => "rg16i", + TextureFormat.R32G32Float => "rg32f", + TextureFormat.R32G32Uint => "rg32ui", + TextureFormat.R32G32Sint => "rg32i", + TextureFormat.R8G8B8A8Unorm => "rgba8", + TextureFormat.R8G8B8A8Snorm => "rgba8_snorm", + TextureFormat.R8G8B8A8Uint => "rgba8ui", + TextureFormat.R8G8B8A8Sint => "rgba8i", + TextureFormat.R16G16B16A16Float => "rgba16f", + TextureFormat.R16G16B16A16Unorm => "rgba16", + TextureFormat.R16G16B16A16Snorm => "rgba16_snorm", + TextureFormat.R16G16B16A16Uint => "rgba16ui", + TextureFormat.R16G16B16A16Sint => "rgba16i", + TextureFormat.R32G32B32A32Float => "rgba32f", + TextureFormat.R32G32B32A32Uint => "rgba32ui", + TextureFormat.R32G32B32A32Sint => "rgba32i", + TextureFormat.R10G10B10A2Unorm => "rgb10_a2", + TextureFormat.R10G10B10A2Uint => "rgb10_a2ui", + TextureFormat.R11G11B10Float => "r11f_g11f_b10f", + _ => string.Empty + }; + } + + public static AggregateType GetComponentType(this TextureFormat format) + { + switch (format) + { + case TextureFormat.R8Uint: + case TextureFormat.R16Uint: + case TextureFormat.R32Uint: + case TextureFormat.R8G8Uint: + case TextureFormat.R16G16Uint: + case TextureFormat.R32G32Uint: + case TextureFormat.R8G8B8A8Uint: + case TextureFormat.R16G16B16A16Uint: + case TextureFormat.R32G32B32A32Uint: + case TextureFormat.R10G10B10A2Uint: + return AggregateType.U32; + case TextureFormat.R8Sint: + case TextureFormat.R16Sint: + case TextureFormat.R32Sint: + case TextureFormat.R8G8Sint: + case TextureFormat.R16G16Sint: + case TextureFormat.R32G32Sint: + case TextureFormat.R8G8B8A8Sint: + case TextureFormat.R16G16B16A16Sint: + case TextureFormat.R32G32B32A32Sint: + return AggregateType.S32; + } + + return AggregateType.FP32; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/TextureHandle.cs b/src/Ryujinx.Graphics.Shader/TextureHandle.cs new file mode 100644 index 00000000..39d5c1c3 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/TextureHandle.cs @@ -0,0 +1,124 @@ +using System; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Shader +{ + public enum TextureHandleType + { + CombinedSampler = 0, // Must be 0. + SeparateSamplerHandle = 1, + SeparateSamplerId = 2, + SeparateConstantSamplerHandle = 3 + } + + public static class TextureHandle + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int PackSlots(int cbufSlot0, int cbufSlot1) + { + return cbufSlot0 | ((cbufSlot1 + 1) << 16); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static (int, int) UnpackSlots(int slots, int defaultTextureBufferIndex) + { + int textureBufferIndex; + int samplerBufferIndex; + + if (slots < 0) + { + textureBufferIndex = defaultTextureBufferIndex; + samplerBufferIndex = textureBufferIndex; + } + else + { + uint high = (uint)slots >> 16; + + textureBufferIndex = (ushort)slots; + samplerBufferIndex = high != 0 ? (int)high - 1 : textureBufferIndex; + } + + return (textureBufferIndex, samplerBufferIndex); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int PackOffsets(int cbufOffset0, int cbufOffset1, TextureHandleType type) + { + return cbufOffset0 | (cbufOffset1 << 14) | ((int)type << 28); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static (int, int, TextureHandleType) UnpackOffsets(int handle) + { + return (handle & 0x3fff, (handle >> 14) & 0x3fff, (TextureHandleType)((uint)handle >> 28)); + } + + /// <summary> + /// Unpacks the texture ID from the real texture handle. + /// </summary> + /// <param name="packedId">The real texture handle</param> + /// <returns>The texture ID</returns> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int UnpackTextureId(int packedId) + { + return (packedId >> 0) & 0xfffff; + } + + /// <summary> + /// Unpacks the sampler ID from the real texture handle. + /// </summary> + /// <param name="packedId">The real texture handle</param> + /// <returns>The sampler ID</returns> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int UnpackSamplerId(int packedId) + { + return (packedId >> 20) & 0xfff; + } + + /// <summary> + /// Reads a packed texture and sampler ID (basically, the real texture handle) + /// from a given texture/sampler constant buffer. + /// </summary> + /// <param name="wordOffset">A word offset of the handle on the buffer (the "fake" shader handle)</param> + /// <param name="cachedTextureBuffer">The constant buffer to fetch texture IDs from</param> + /// <param name="cachedSamplerBuffer">The constant buffer to fetch sampler IDs from</param> + /// <returns>The packed texture and sampler ID (the real texture handle)</returns> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int ReadPackedId(int wordOffset, ReadOnlySpan<int> cachedTextureBuffer, ReadOnlySpan<int> cachedSamplerBuffer) + { + (int textureWordOffset, int samplerWordOffset, TextureHandleType handleType) = UnpackOffsets(wordOffset); + + int handle = cachedTextureBuffer.Length != 0 ? cachedTextureBuffer[textureWordOffset] : 0; + + // The "wordOffset" (which is really the immediate value used on texture instructions on the shader) + // is a 13-bit value. However, in order to also support separate samplers and textures (which uses + // bindless textures on the shader), we extend it with another value on the higher 16 bits with + // another offset for the sampler. + // The shader translator has code to detect separate texture and sampler uses with a bindless texture, + // turn that into a regular texture access and produce those special handles with values on the higher 16 bits. + if (handleType != TextureHandleType.CombinedSampler) + { + int samplerHandle; + + if (handleType != TextureHandleType.SeparateConstantSamplerHandle) + { + samplerHandle = cachedSamplerBuffer.Length != 0 ? cachedSamplerBuffer[samplerWordOffset] : 0; + } + else + { + samplerHandle = samplerWordOffset; + } + + if (handleType == TextureHandleType.SeparateSamplerId || + handleType == TextureHandleType.SeparateConstantSamplerHandle) + { + samplerHandle <<= 20; + } + + handle |= samplerHandle; + } + + return handle; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/TextureUsageFlags.cs b/src/Ryujinx.Graphics.Shader/TextureUsageFlags.cs new file mode 100644 index 00000000..2419a1de --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/TextureUsageFlags.cs @@ -0,0 +1,19 @@ +using System; + +namespace Ryujinx.Graphics.Shader +{ + /// <summary> + /// Flags that indicate how a texture will be used in a shader. + /// </summary> + [Flags] + public enum TextureUsageFlags + { + None = 0, + + // Integer sampled textures must be noted for resolution scaling. + ResScaleUnsupported = 1 << 0, + NeedsScaleValue = 1 << 1, + ImageStore = 1 << 2, + ImageCoherent = 1 << 3 + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/AggregateType.cs b/src/Ryujinx.Graphics.Shader/Translation/AggregateType.cs new file mode 100644 index 00000000..24993e00 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/AggregateType.cs @@ -0,0 +1,25 @@ +namespace Ryujinx.Graphics.Shader.Translation +{ + enum AggregateType + { + Invalid, + Void, + Bool, + FP32, + FP64, + S32, + U32, + + ElementTypeMask = 0xff, + + ElementCountShift = 8, + ElementCountMask = 3 << ElementCountShift, + + Scalar = 0 << ElementCountShift, + Vector2 = 1 << ElementCountShift, + Vector3 = 2 << ElementCountShift, + Vector4 = 3 << ElementCountShift, + + Array = 1 << 10 + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs b/src/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs new file mode 100644 index 00000000..683b0d8a --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs @@ -0,0 +1,36 @@ +namespace Ryujinx.Graphics.Shader.Translation +{ + static class AttributeConsts + { + public const int PrimitiveId = 0x060; + public const int Layer = 0x064; + public const int PositionX = 0x070; + public const int PositionY = 0x074; + public const int FrontColorDiffuseR = 0x280; + public const int BackColorDiffuseR = 0x2a0; + public const int ClipDistance0 = 0x2c0; + public const int ClipDistance1 = 0x2c4; + public const int ClipDistance2 = 0x2c8; + public const int ClipDistance3 = 0x2cc; + public const int ClipDistance4 = 0x2d0; + public const int ClipDistance5 = 0x2d4; + public const int ClipDistance6 = 0x2d8; + public const int ClipDistance7 = 0x2dc; + public const int FogCoord = 0x2e8; + public const int TessCoordX = 0x2f0; + public const int TessCoordY = 0x2f4; + public const int InstanceId = 0x2f8; + public const int VertexId = 0x2fc; + public const int TexCoordCount = 10; + public const int TexCoordBase = 0x300; + public const int TexCoordEnd = TexCoordBase + TexCoordCount * 16; + public const int FrontFacing = 0x3fc; + + public const int UserAttributesCount = 32; + public const int UserAttributeBase = 0x80; + public const int UserAttributeEnd = UserAttributeBase + UserAttributesCount * 16; + + public const int UserAttributePerPatchBase = 0x18; + public const int UserAttributePerPatchEnd = 0x200; + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/ControlFlowGraph.cs b/src/Ryujinx.Graphics.Shader/Translation/ControlFlowGraph.cs new file mode 100644 index 00000000..65328fd7 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/ControlFlowGraph.cs @@ -0,0 +1,176 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Shader.Translation +{ + class ControlFlowGraph + { + public BasicBlock[] Blocks { get; } + public BasicBlock[] PostOrderBlocks { get; } + public int[] PostOrderMap { get; } + + public ControlFlowGraph(BasicBlock[] blocks) + { + Blocks = blocks; + + HashSet<BasicBlock> visited = new HashSet<BasicBlock>(); + + Stack<BasicBlock> blockStack = new Stack<BasicBlock>(); + + List<BasicBlock> postOrderBlocks = new List<BasicBlock>(blocks.Length); + + PostOrderMap = new int[blocks.Length]; + + visited.Add(blocks[0]); + + blockStack.Push(blocks[0]); + + while (blockStack.TryPop(out BasicBlock block)) + { + if (block.Next != null && visited.Add(block.Next)) + { + blockStack.Push(block); + blockStack.Push(block.Next); + } + else if (block.Branch != null && visited.Add(block.Branch)) + { + blockStack.Push(block); + blockStack.Push(block.Branch); + } + else + { + PostOrderMap[block.Index] = postOrderBlocks.Count; + + postOrderBlocks.Add(block); + } + } + + PostOrderBlocks = postOrderBlocks.ToArray(); + } + + public static ControlFlowGraph Create(Operation[] operations) + { + Dictionary<Operand, BasicBlock> labels = new Dictionary<Operand, BasicBlock>(); + + List<BasicBlock> blocks = new List<BasicBlock>(); + + BasicBlock currentBlock = null; + + void NextBlock(BasicBlock nextBlock) + { + if (currentBlock != null && !EndsWithUnconditionalInst(currentBlock.GetLastOp())) + { + currentBlock.Next = nextBlock; + } + + currentBlock = nextBlock; + } + + void NewNextBlock() + { + BasicBlock block = new BasicBlock(blocks.Count); + + blocks.Add(block); + + NextBlock(block); + } + + bool needsNewBlock = true; + + for (int index = 0; index < operations.Length; index++) + { + Operation operation = operations[index]; + + if (operation.Inst == Instruction.MarkLabel) + { + Operand label = operation.Dest; + + if (labels.TryGetValue(label, out BasicBlock nextBlock)) + { + nextBlock.Index = blocks.Count; + + blocks.Add(nextBlock); + + NextBlock(nextBlock); + } + else + { + NewNextBlock(); + + labels.Add(label, currentBlock); + } + } + else + { + if (needsNewBlock) + { + NewNextBlock(); + } + + currentBlock.Operations.AddLast(operation); + } + + needsNewBlock = operation.Inst == Instruction.Branch || + operation.Inst == Instruction.BranchIfTrue || + operation.Inst == Instruction.BranchIfFalse; + + if (needsNewBlock) + { + Operand label = operation.Dest; + + if (!labels.TryGetValue(label, out BasicBlock branchBlock)) + { + branchBlock = new BasicBlock(); + + labels.Add(label, branchBlock); + } + + currentBlock.Branch = branchBlock; + } + } + + // Remove unreachable blocks. + bool hasUnreachable; + + do + { + hasUnreachable = false; + + for (int blkIndex = 1; blkIndex < blocks.Count; blkIndex++) + { + BasicBlock block = blocks[blkIndex]; + + if (block.Predecessors.Count == 0) + { + block.Next = null; + block.Branch = null; + blocks.RemoveAt(blkIndex--); + hasUnreachable = true; + } + else + { + block.Index = blkIndex; + } + } + } while (hasUnreachable); + + return new ControlFlowGraph(blocks.ToArray()); + } + + private static bool EndsWithUnconditionalInst(INode node) + { + if (node is Operation operation) + { + switch (operation.Inst) + { + case Instruction.Branch: + case Instruction.Discard: + case Instruction.Return: + return true; + } + } + + return false; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/Dominance.cs b/src/Ryujinx.Graphics.Shader/Translation/Dominance.cs new file mode 100644 index 00000000..09c2eb0f --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Dominance.cs @@ -0,0 +1,94 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; + +namespace Ryujinx.Graphics.Shader.Translation +{ + static class Dominance + { + // Those methods are an implementation of the algorithms on "A Simple, Fast Dominance Algorithm". + // https://www.cs.rice.edu/~keith/EMBED/dom.pdf + public static void FindDominators(ControlFlowGraph cfg) + { + BasicBlock Intersect(BasicBlock block1, BasicBlock block2) + { + while (block1 != block2) + { + while (cfg.PostOrderMap[block1.Index] < cfg.PostOrderMap[block2.Index]) + { + block1 = block1.ImmediateDominator; + } + + while (cfg.PostOrderMap[block2.Index] < cfg.PostOrderMap[block1.Index]) + { + block2 = block2.ImmediateDominator; + } + } + + return block1; + } + + cfg.Blocks[0].ImmediateDominator = cfg.Blocks[0]; + + bool modified; + + do + { + modified = false; + + for (int blkIndex = cfg.PostOrderBlocks.Length - 2; blkIndex >= 0; blkIndex--) + { + BasicBlock block = cfg.PostOrderBlocks[blkIndex]; + + BasicBlock newIDom = null; + + foreach (BasicBlock predecessor in block.Predecessors) + { + if (predecessor.ImmediateDominator != null) + { + if (newIDom != null) + { + newIDom = Intersect(predecessor, newIDom); + } + else + { + newIDom = predecessor; + } + } + } + + if (block.ImmediateDominator != newIDom) + { + block.ImmediateDominator = newIDom; + + modified = true; + } + } + } + while (modified); + } + + public static void FindDominanceFrontiers(BasicBlock[] blocks) + { + for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) + { + BasicBlock block = blocks[blkIndex]; + + if (block.Predecessors.Count < 2) + { + continue; + } + + for (int pBlkIndex = 0; pBlkIndex < block.Predecessors.Count; pBlkIndex++) + { + BasicBlock current = block.Predecessors[pBlkIndex]; + + while (current != block.ImmediateDominator) + { + current.DominanceFrontiers.Add(block); + + current = current.ImmediateDominator; + } + } + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs b/src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs new file mode 100644 index 00000000..112baccf --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs @@ -0,0 +1,492 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; +using System.Diagnostics; +using System.Numerics; +using System.Runtime.CompilerServices; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation +{ + class EmitterContext + { + public DecodedProgram Program { get; } + public ShaderConfig Config { get; } + + public bool IsNonMain { get; } + + public Block CurrBlock { get; set; } + public InstOp CurrOp { get; set; } + + public int OperationsCount => _operations.Count; + + private readonly struct BrxTarget + { + public readonly Operand Selector; + public readonly int ExpectedValue; + public readonly ulong NextTargetAddress; + + public BrxTarget(Operand selector, int expectedValue, ulong nextTargetAddress) + { + Selector = selector; + ExpectedValue = expectedValue; + NextTargetAddress = nextTargetAddress; + } + } + + private class BlockLabel + { + public readonly Operand Label; + public BrxTarget BrxTarget; + + public BlockLabel(Operand label) + { + Label = label; + } + } + + private readonly List<Operation> _operations; + private readonly Dictionary<ulong, BlockLabel> _labels; + + public EmitterContext(DecodedProgram program, ShaderConfig config, bool isNonMain) + { + Program = program; + Config = config; + IsNonMain = isNonMain; + _operations = new List<Operation>(); + _labels = new Dictionary<ulong, BlockLabel>(); + + EmitStart(); + } + + private void EmitStart() + { + if (Config.Stage == ShaderStage.Vertex && + Config.Options.TargetApi == TargetApi.Vulkan && + (Config.Options.Flags & TranslationFlags.VertexA) == 0) + { + // Vulkan requires the point size to be always written on the shader if the primitive topology is points. + this.Store(StorageKind.Output, IoVariable.PointSize, null, ConstF(Config.GpuAccessor.QueryPointSize())); + } + } + + public T GetOp<T>() where T : unmanaged + { + Debug.Assert(Unsafe.SizeOf<T>() == sizeof(ulong)); + ulong op = CurrOp.RawOpCode; + return Unsafe.As<ulong, T>(ref op); + } + + public Operand Add(Instruction inst, Operand dest = null, params Operand[] sources) + { + Operation operation = new Operation(inst, dest, sources); + + _operations.Add(operation); + + return dest; + } + + public Operand Add(Instruction inst, StorageKind storageKind, Operand dest = null, params Operand[] sources) + { + Operation operation = new Operation(inst, storageKind, dest, sources); + + _operations.Add(operation); + + return dest; + } + + public (Operand, Operand) Add(Instruction inst, (Operand, Operand) dest, params Operand[] sources) + { + Operand[] dests = new[] { dest.Item1, dest.Item2 }; + + Operation operation = new Operation(inst, 0, dests, sources); + + Add(operation); + + return dest; + } + + public void Add(Operation operation) + { + _operations.Add(operation); + } + + public TextureOperation CreateTextureOperation( + Instruction inst, + SamplerType type, + TextureFlags flags, + int handle, + int compIndex, + Operand[] dests, + params Operand[] sources) + { + return CreateTextureOperation(inst, type, TextureFormat.Unknown, flags, handle, compIndex, dests, sources); + } + + public TextureOperation CreateTextureOperation( + Instruction inst, + SamplerType type, + TextureFormat format, + TextureFlags flags, + int handle, + int compIndex, + Operand[] dests, + params Operand[] sources) + { + if (!flags.HasFlag(TextureFlags.Bindless)) + { + Config.SetUsedTexture(inst, type, format, flags, TextureOperation.DefaultCbufSlot, handle); + } + + return new TextureOperation(inst, type, format, flags, handle, compIndex, dests, sources); + } + + public void FlagAttributeRead(int attribute) + { + if (Config.Stage == ShaderStage.Vertex && attribute == AttributeConsts.InstanceId) + { + Config.SetUsedFeature(FeatureFlags.InstanceId); + } + else if (Config.Stage == ShaderStage.Fragment) + { + switch (attribute) + { + case AttributeConsts.PositionX: + case AttributeConsts.PositionY: + Config.SetUsedFeature(FeatureFlags.FragCoordXY); + break; + } + } + } + + public void FlagAttributeWritten(int attribute) + { + if (Config.Stage == ShaderStage.Vertex) + { + switch (attribute) + { + case AttributeConsts.ClipDistance0: + case AttributeConsts.ClipDistance1: + case AttributeConsts.ClipDistance2: + case AttributeConsts.ClipDistance3: + case AttributeConsts.ClipDistance4: + case AttributeConsts.ClipDistance5: + case AttributeConsts.ClipDistance6: + case AttributeConsts.ClipDistance7: + Config.SetClipDistanceWritten((attribute - AttributeConsts.ClipDistance0) / 4); + break; + } + } + + if (Config.Stage != ShaderStage.Fragment && attribute == AttributeConsts.Layer) + { + Config.SetUsedFeature(FeatureFlags.RtLayer); + } + } + + public void MarkLabel(Operand label) + { + Add(Instruction.MarkLabel, label); + } + + public Operand GetLabel(ulong address) + { + return EnsureBlockLabel(address).Label; + } + + public void SetBrxTarget(ulong address, Operand selector, int targetValue, ulong nextTargetAddress) + { + BlockLabel blockLabel = EnsureBlockLabel(address); + Debug.Assert(blockLabel.BrxTarget.Selector == null); + blockLabel.BrxTarget = new BrxTarget(selector, targetValue, nextTargetAddress); + } + + public void EnterBlock(ulong address) + { + BlockLabel blockLabel = EnsureBlockLabel(address); + + MarkLabel(blockLabel.Label); + + BrxTarget brxTarget = blockLabel.BrxTarget; + + if (brxTarget.Selector != null) + { + this.BranchIfFalse(GetLabel(brxTarget.NextTargetAddress), this.ICompareEqual(brxTarget.Selector, Const(brxTarget.ExpectedValue))); + } + } + + private BlockLabel EnsureBlockLabel(ulong address) + { + if (!_labels.TryGetValue(address, out BlockLabel blockLabel)) + { + blockLabel = new BlockLabel(Label()); + + _labels.Add(address, blockLabel); + } + + return blockLabel; + } + + public void PrepareForVertexReturn() + { + if (Config.GpuAccessor.QueryViewportTransformDisable()) + { + Operand x = this.Load(StorageKind.Output, IoVariable.Position, null, Const(0)); + Operand y = this.Load(StorageKind.Output, IoVariable.Position, null, Const(1)); + Operand xScale = this.Load(StorageKind.Input, IoVariable.SupportBlockViewInverse, null, Const(0)); + Operand yScale = this.Load(StorageKind.Input, IoVariable.SupportBlockViewInverse, null, Const(1)); + Operand negativeOne = ConstF(-1.0f); + + this.Store(StorageKind.Output, IoVariable.Position, null, Const(0), this.FPFusedMultiplyAdd(x, xScale, negativeOne)); + this.Store(StorageKind.Output, IoVariable.Position, null, Const(1), this.FPFusedMultiplyAdd(y, yScale, negativeOne)); + } + + if (Config.Options.TargetApi == TargetApi.Vulkan && Config.GpuAccessor.QueryTransformDepthMinusOneToOne()) + { + Operand z = this.Load(StorageKind.Output, IoVariable.Position, null, Const(2)); + Operand w = this.Load(StorageKind.Output, IoVariable.Position, null, Const(3)); + Operand halfW = this.FPMultiply(w, ConstF(0.5f)); + + this.Store(StorageKind.Output, IoVariable.Position, null, Const(2), this.FPFusedMultiplyAdd(z, ConstF(0.5f), halfW)); + } + + if (Config.Stage != ShaderStage.Geometry && Config.HasLayerInputAttribute) + { + Config.SetUsedFeature(FeatureFlags.RtLayer); + + int attrVecIndex = Config.GpLayerInputAttribute >> 2; + int attrComponentIndex = Config.GpLayerInputAttribute & 3; + + Operand layer = this.Load(StorageKind.Output, IoVariable.UserDefined, null, Const(attrVecIndex), Const(attrComponentIndex)); + + this.Store(StorageKind.Output, IoVariable.Layer, null, layer); + } + } + + public void PrepareForVertexReturn(out Operand oldXLocal, out Operand oldYLocal, out Operand oldZLocal) + { + if (Config.GpuAccessor.QueryViewportTransformDisable()) + { + oldXLocal = Local(); + this.Copy(oldXLocal, this.Load(StorageKind.Output, IoVariable.Position, null, Const(0))); + oldYLocal = Local(); + this.Copy(oldYLocal, this.Load(StorageKind.Output, IoVariable.Position, null, Const(1))); + } + else + { + oldXLocal = null; + oldYLocal = null; + } + + if (Config.Options.TargetApi == TargetApi.Vulkan && Config.GpuAccessor.QueryTransformDepthMinusOneToOne()) + { + oldZLocal = Local(); + this.Copy(oldZLocal, this.Load(StorageKind.Output, IoVariable.Position, null, Const(2))); + } + else + { + oldZLocal = null; + } + + PrepareForVertexReturn(); + } + + public void PrepareForReturn() + { + if (IsNonMain) + { + return; + } + + if (Config.LastInVertexPipeline && + (Config.Stage == ShaderStage.Vertex || Config.Stage == ShaderStage.TessellationEvaluation) && + (Config.Options.Flags & TranslationFlags.VertexA) == 0) + { + PrepareForVertexReturn(); + } + else if (Config.Stage == ShaderStage.Geometry) + { + void WritePositionOutput(int primIndex) + { + Operand x = this.Load(StorageKind.Input, IoVariable.Position, Const(primIndex), Const(0)); + Operand y = this.Load(StorageKind.Input, IoVariable.Position, Const(primIndex), Const(1)); + Operand z = this.Load(StorageKind.Input, IoVariable.Position, Const(primIndex), Const(2)); + Operand w = this.Load(StorageKind.Input, IoVariable.Position, Const(primIndex), Const(3)); + + this.Store(StorageKind.Output, IoVariable.Position, null, Const(0), x); + this.Store(StorageKind.Output, IoVariable.Position, null, Const(1), y); + this.Store(StorageKind.Output, IoVariable.Position, null, Const(2), z); + this.Store(StorageKind.Output, IoVariable.Position, null, Const(3), w); + } + + void WriteUserDefinedOutput(int index, int primIndex) + { + Operand x = this.Load(StorageKind.Input, IoVariable.UserDefined, Const(index), Const(primIndex), Const(0)); + Operand y = this.Load(StorageKind.Input, IoVariable.UserDefined, Const(index), Const(primIndex), Const(1)); + Operand z = this.Load(StorageKind.Input, IoVariable.UserDefined, Const(index), Const(primIndex), Const(2)); + Operand w = this.Load(StorageKind.Input, IoVariable.UserDefined, Const(index), Const(primIndex), Const(3)); + + this.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(index), Const(0), x); + this.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(index), Const(1), y); + this.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(index), Const(2), z); + this.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(index), Const(3), w); + } + + if (Config.GpPassthrough && !Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough()) + { + int inputVertices = Config.GpuAccessor.QueryPrimitiveTopology().ToInputVertices(); + + for (int primIndex = 0; primIndex < inputVertices; primIndex++) + { + WritePositionOutput(primIndex); + + int passthroughAttributes = Config.PassthroughAttributes; + while (passthroughAttributes != 0) + { + int index = BitOperations.TrailingZeroCount(passthroughAttributes); + WriteUserDefinedOutput(index, primIndex); + Config.SetOutputUserAttribute(index); + passthroughAttributes &= ~(1 << index); + } + + this.EmitVertex(); + } + + this.EndPrimitive(); + } + } + else if (Config.Stage == ShaderStage.Fragment) + { + GenerateAlphaToCoverageDitherDiscard(); + + bool supportsBgra = Config.GpuAccessor.QueryHostSupportsBgraFormat(); + + if (Config.OmapDepth) + { + Operand src = Register(Config.GetDepthRegister(), RegisterType.Gpr); + + this.Store(StorageKind.Output, IoVariable.FragmentOutputDepth, null, src); + } + + AlphaTestOp alphaTestOp = Config.GpuAccessor.QueryAlphaTestCompare(); + + if (alphaTestOp != AlphaTestOp.Always && (Config.OmapTargets & 8) != 0) + { + if (alphaTestOp == AlphaTestOp.Never) + { + this.Discard(); + } + else + { + Instruction comparator = alphaTestOp switch + { + AlphaTestOp.Equal => Instruction.CompareEqual, + AlphaTestOp.Greater => Instruction.CompareGreater, + AlphaTestOp.GreaterOrEqual => Instruction.CompareGreaterOrEqual, + AlphaTestOp.Less => Instruction.CompareLess, + AlphaTestOp.LessOrEqual => Instruction.CompareLessOrEqual, + AlphaTestOp.NotEqual => Instruction.CompareNotEqual, + _ => 0 + }; + + Debug.Assert(comparator != 0, $"Invalid alpha test operation \"{alphaTestOp}\"."); + + Operand alpha = Register(3, RegisterType.Gpr); + Operand alphaRef = ConstF(Config.GpuAccessor.QueryAlphaTestReference()); + Operand alphaPass = Add(Instruction.FP32 | comparator, Local(), alpha, alphaRef); + Operand alphaPassLabel = Label(); + + this.BranchIfTrue(alphaPassLabel, alphaPass); + this.Discard(); + this.MarkLabel(alphaPassLabel); + } + } + + int regIndexBase = 0; + + for (int rtIndex = 0; rtIndex < 8; rtIndex++) + { + for (int component = 0; component < 4; component++) + { + bool componentEnabled = (Config.OmapTargets & (1 << (rtIndex * 4 + component))) != 0; + if (!componentEnabled) + { + continue; + } + + Operand src = Register(regIndexBase + component, RegisterType.Gpr); + + // Perform B <-> R swap if needed, for BGRA formats (not supported on OpenGL). + if (!supportsBgra && (component == 0 || component == 2)) + { + Operand isBgra = this.Load(StorageKind.Input, IoVariable.FragmentOutputIsBgra, null, Const(rtIndex)); + + Operand lblIsBgra = Label(); + Operand lblEnd = Label(); + + this.BranchIfTrue(lblIsBgra, isBgra); + + this.Store(StorageKind.Output, IoVariable.FragmentOutputColor, null, Const(rtIndex), Const(component), src); + this.Branch(lblEnd); + + MarkLabel(lblIsBgra); + + this.Store(StorageKind.Output, IoVariable.FragmentOutputColor, null, Const(rtIndex), Const(2 - component), src); + + MarkLabel(lblEnd); + } + else + { + this.Store(StorageKind.Output, IoVariable.FragmentOutputColor, null, Const(rtIndex), Const(component), src); + } + } + + bool targetEnabled = (Config.OmapTargets & (0xf << (rtIndex * 4))) != 0; + if (targetEnabled) + { + Config.SetOutputUserAttribute(rtIndex); + regIndexBase += 4; + } + } + } + } + + private void GenerateAlphaToCoverageDitherDiscard() + { + // If the feature is disabled, or alpha is not written, then we're done. + if (!Config.GpuAccessor.QueryAlphaToCoverageDitherEnable() || (Config.OmapTargets & 8) == 0) + { + return; + } + + // 11 11 11 10 10 10 10 00 + // 11 01 01 01 01 00 00 00 + Operand ditherMask = Const(unchecked((int)0xfbb99110u)); + + Operand fragCoordX = this.Load(StorageKind.Input, IoVariable.FragmentCoord, null, Const(0)); + Operand fragCoordY = this.Load(StorageKind.Input, IoVariable.FragmentCoord, null, Const(1)); + + Operand x = this.BitwiseAnd(this.FP32ConvertToU32(fragCoordX), Const(1)); + Operand y = this.BitwiseAnd(this.FP32ConvertToU32(fragCoordY), Const(1)); + Operand xy = this.BitwiseOr(x, this.ShiftLeft(y, Const(1))); + + Operand alpha = Register(3, RegisterType.Gpr); + Operand scaledAlpha = this.FPMultiply(this.FPSaturate(alpha), ConstF(8)); + Operand quantizedAlpha = this.IMinimumU32(this.FP32ConvertToU32(scaledAlpha), Const(7)); + Operand shift = this.BitwiseOr(this.ShiftLeft(quantizedAlpha, Const(2)), xy); + Operand opaque = this.BitwiseAnd(this.ShiftRightU32(ditherMask, shift), Const(1)); + + Operand a2cDitherEndLabel = Label(); + + this.BranchIfTrue(a2cDitherEndLabel, opaque); + this.Discard(); + this.MarkLabel(a2cDitherEndLabel); + } + + public Operation[] GetOperations() + { + return _operations.ToArray(); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs b/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs new file mode 100644 index 00000000..93748249 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs @@ -0,0 +1,819 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation +{ + static class EmitterContextInsts + { + public static Operand AtomicAdd(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.AtomicAdd, storageKind, Local(), a, b, c); + } + + public static Operand AtomicAnd(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.AtomicAnd, storageKind, Local(), a, b, c); + } + + public static Operand AtomicCompareAndSwap(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c, Operand d) + { + return context.Add(Instruction.AtomicCompareAndSwap, storageKind, Local(), a, b, c, d); + } + + public static Operand AtomicMaxS32(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.AtomicMaxS32, storageKind, Local(), a, b, c); + } + + public static Operand AtomicMaxU32(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.AtomicMaxU32, storageKind, Local(), a, b, c); + } + + public static Operand AtomicMinS32(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.AtomicMinS32, storageKind, Local(), a, b, c); + } + + public static Operand AtomicMinU32(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.AtomicMinU32, storageKind, Local(), a, b, c); + } + + public static Operand AtomicOr(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.AtomicOr, storageKind, Local(), a, b, c); + } + + public static Operand AtomicSwap(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.AtomicSwap, storageKind, Local(), a, b, c); + } + + public static Operand AtomicXor(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.AtomicXor, storageKind, Local(), a, b, c); + } + + public static Operand Ballot(this EmitterContext context, Operand a) + { + return context.Add(Instruction.Ballot, Local(), a); + } + + public static Operand Barrier(this EmitterContext context) + { + return context.Add(Instruction.Barrier); + } + + public static Operand BitCount(this EmitterContext context, Operand a) + { + return context.Add(Instruction.BitCount, Local(), a); + } + + public static Operand BitfieldExtractS32(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.BitfieldExtractS32, Local(), a, b, c); + } + + public static Operand BitfieldExtractU32(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.BitfieldExtractU32, Local(), a, b, c); + } + + public static Operand BitfieldInsert(this EmitterContext context, Operand a, Operand b, Operand c, Operand d) + { + return context.Add(Instruction.BitfieldInsert, Local(), a, b, c, d); + } + + public static Operand BitfieldReverse(this EmitterContext context, Operand a) + { + return context.Add(Instruction.BitfieldReverse, Local(), a); + } + + public static Operand BitwiseAnd(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.BitwiseAnd, Local(), a, b); + } + + public static Operand BitwiseExclusiveOr(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.BitwiseExclusiveOr, Local(), a, b); + } + + public static Operand BitwiseNot(this EmitterContext context, Operand a, bool invert) + { + if (invert) + { + a = context.BitwiseNot(a); + } + + return a; + } + + public static Operand BitwiseNot(this EmitterContext context, Operand a) + { + return context.Add(Instruction.BitwiseNot, Local(), a); + } + + public static Operand BitwiseOr(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.BitwiseOr, Local(), a, b); + } + + public static Operand Branch(this EmitterContext context, Operand d) + { + return context.Add(Instruction.Branch, d); + } + + public static Operand BranchIfFalse(this EmitterContext context, Operand d, Operand a) + { + return context.Add(Instruction.BranchIfFalse, d, a); + } + + public static Operand BranchIfTrue(this EmitterContext context, Operand d, Operand a) + { + return context.Add(Instruction.BranchIfTrue, d, a); + } + + public static Operand Call(this EmitterContext context, int funcId, bool returns, params Operand[] args) + { + Operand[] args2 = new Operand[args.Length + 1]; + + args2[0] = Const(funcId); + args.CopyTo(args2, 1); + + return context.Add(Instruction.Call, returns ? Local() : null, args2); + } + + public static Operand ConditionalSelect(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.ConditionalSelect, Local(), a, b, c); + } + + public static Operand Copy(this EmitterContext context, Operand a) + { + return context.Add(Instruction.Copy, Local(), a); + } + + public static void Copy(this EmitterContext context, Operand d, Operand a) + { + if (d.Type == OperandType.Constant) + { + return; + } + + context.Add(Instruction.Copy, d, a); + } + + public static Operand Discard(this EmitterContext context) + { + return context.Add(Instruction.Discard); + } + + public static Operand EmitVertex(this EmitterContext context) + { + return context.Add(Instruction.EmitVertex); + } + + public static Operand EndPrimitive(this EmitterContext context) + { + return context.Add(Instruction.EndPrimitive); + } + + public static Operand FindLSB(this EmitterContext context, Operand a) + { + return context.Add(Instruction.FindLSB, Local(), a); + } + + public static Operand FindMSBS32(this EmitterContext context, Operand a) + { + return context.Add(Instruction.FindMSBS32, Local(), a); + } + + public static Operand FindMSBU32(this EmitterContext context, Operand a) + { + return context.Add(Instruction.FindMSBU32, Local(), a); + } + + public static Operand FP32ConvertToFP64(this EmitterContext context, Operand a) + { + return context.Add(Instruction.ConvertFP32ToFP64, Local(), a); + } + + public static Operand FP64ConvertToFP32(this EmitterContext context, Operand a) + { + return context.Add(Instruction.ConvertFP64ToFP32, Local(), a); + } + + public static Operand FPAbsNeg(this EmitterContext context, Operand a, bool abs, bool neg, Instruction fpType = Instruction.FP32) + { + return context.FPNegate(context.FPAbsolute(a, abs, fpType), neg, fpType); + } + + public static Operand FPAbsolute(this EmitterContext context, Operand a, bool abs, Instruction fpType = Instruction.FP32) + { + if (abs) + { + a = context.FPAbsolute(a, fpType); + } + + return a; + } + + public static Operand FPAbsolute(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.Absolute, Local(), a); + } + + public static Operand FPAdd(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.Add, Local(), a, b); + } + + public static Operand FPCeiling(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.Ceiling, Local(), a); + } + + public static Operand FPCompareEqual(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.CompareEqual, Local(), a, b); + } + + public static Operand FPCompareLess(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.CompareLess, Local(), a, b); + } + + public static Operand FP32ConvertToS32(this EmitterContext context, Operand a) + { + return context.Add(Instruction.ConvertFP32ToS32, Local(), a); + } + + public static Operand FP32ConvertToU32(this EmitterContext context, Operand a) + { + return context.Add(Instruction.ConvertFP32ToU32, Local(), a); + } + + public static Operand FP64ConvertToS32(this EmitterContext context, Operand a) + { + return context.Add(Instruction.ConvertFP64ToS32, Local(), a); + } + + public static Operand FP64ConvertToU32(this EmitterContext context, Operand a) + { + return context.Add(Instruction.ConvertFP64ToU32, Local(), a); + } + + public static Operand FPCosine(this EmitterContext context, Operand a) + { + return context.Add(Instruction.FP32 | Instruction.Cosine, Local(), a); + } + + public static Operand FPDivide(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.Divide, Local(), a, b); + } + + public static Operand FPExponentB2(this EmitterContext context, Operand a) + { + return context.Add(Instruction.FP32 | Instruction.ExponentB2, Local(), a); + } + + public static Operand FPFloor(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.Floor, Local(), a); + } + + public static Operand FPFusedMultiplyAdd(this EmitterContext context, Operand a, Operand b, Operand c, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.FusedMultiplyAdd, Local(), a, b, c); + } + + public static Operand FPLogarithmB2(this EmitterContext context, Operand a) + { + return context.Add(Instruction.FP32 | Instruction.LogarithmB2, Local(), a); + } + + public static Operand FPMaximum(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.Maximum, Local(), a, b); + } + + public static Operand FPMinimum(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.Minimum, Local(), a, b); + } + + public static Operand FPMultiply(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.Multiply, Local(), a, b); + } + + public static Operand FPNegate(this EmitterContext context, Operand a, bool neg, Instruction fpType = Instruction.FP32) + { + if (neg) + { + a = context.FPNegate(a, fpType); + } + + return a; + } + + public static Operand FPNegate(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.Negate, Local(), a); + } + + public static Operand FPReciprocal(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32) + { + return context.FPDivide(fpType == Instruction.FP64 ? context.PackDouble2x32(1.0) : ConstF(1), a, fpType); + } + + public static Operand FPReciprocalSquareRoot(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.ReciprocalSquareRoot, Local(), a); + } + + public static Operand FPRound(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.Round, Local(), a); + } + + public static Operand FPSaturate(this EmitterContext context, Operand a, bool sat, Instruction fpType = Instruction.FP32) + { + if (sat) + { + a = context.FPSaturate(a, fpType); + } + + return a; + } + + public static Operand FPSaturate(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32) + { + return fpType == Instruction.FP64 + ? context.Add(fpType | Instruction.Clamp, Local(), a, context.PackDouble2x32(0.0), context.PackDouble2x32(1.0)) + : context.Add(fpType | Instruction.Clamp, Local(), a, ConstF(0), ConstF(1)); + } + + public static Operand FPSine(this EmitterContext context, Operand a) + { + return context.Add(Instruction.FP32 | Instruction.Sine, Local(), a); + } + + public static Operand FPSquareRoot(this EmitterContext context, Operand a) + { + return context.Add(Instruction.FP32 | Instruction.SquareRoot, Local(), a); + } + + public static Operand FPTruncate(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.Truncate, Local(), a); + } + + public static Operand FPSwizzleAdd(this EmitterContext context, Operand a, Operand b, int mask) + { + return context.Add(Instruction.SwizzleAdd, Local(), a, b, Const(mask)); + } + + public static void FSIBegin(this EmitterContext context) + { + context.Add(Instruction.FSIBegin); + } + + public static void FSIEnd(this EmitterContext context) + { + context.Add(Instruction.FSIEnd); + } + + public static Operand GroupMemoryBarrier(this EmitterContext context) + { + return context.Add(Instruction.GroupMemoryBarrier); + } + + public static Operand IAbsNeg(this EmitterContext context, Operand a, bool abs, bool neg) + { + return context.INegate(context.IAbsolute(a, abs), neg); + } + + public static Operand IAbsolute(this EmitterContext context, Operand a, bool abs) + { + if (abs) + { + a = context.IAbsolute(a); + } + + return a; + } + + public static Operand IAbsolute(this EmitterContext context, Operand a) + { + return context.Add(Instruction.Absolute, Local(), a); + } + + public static Operand IAdd(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.Add, Local(), a, b); + } + + public static Operand IClampS32(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.Clamp, Local(), a, b, c); + } + + public static Operand IClampU32(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.ClampU32, Local(), a, b, c); + } + + public static Operand ICompareEqual(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.CompareEqual, Local(), a, b); + } + + public static Operand ICompareGreater(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.CompareGreater, Local(), a, b); + } + + public static Operand ICompareGreaterOrEqual(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.CompareGreaterOrEqual, Local(), a, b); + } + + public static Operand ICompareGreaterOrEqualUnsigned(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.CompareGreaterOrEqualU32, Local(), a, b); + } + + public static Operand ICompareGreaterUnsigned(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.CompareGreaterU32, Local(), a, b); + } + + public static Operand ICompareLess(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.CompareLess, Local(), a, b); + } + + public static Operand ICompareLessOrEqual(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.CompareLessOrEqual, Local(), a, b); + } + + public static Operand ICompareLessOrEqualUnsigned(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.CompareLessOrEqualU32, Local(), a, b); + } + + public static Operand ICompareLessUnsigned(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.CompareLessU32, Local(), a, b); + } + + public static Operand ICompareNotEqual(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.CompareNotEqual, Local(), a, b); + } + + public static Operand IConvertS32ToFP32(this EmitterContext context, Operand a) + { + return context.Add(Instruction.ConvertS32ToFP32, Local(), a); + } + + public static Operand IConvertS32ToFP64(this EmitterContext context, Operand a) + { + return context.Add(Instruction.ConvertS32ToFP64, Local(), a); + } + + public static Operand IConvertU32ToFP32(this EmitterContext context, Operand a) + { + return context.Add(Instruction.ConvertU32ToFP32, Local(), a); + } + + public static Operand IConvertU32ToFP64(this EmitterContext context, Operand a) + { + return context.Add(Instruction.ConvertU32ToFP64, Local(), a); + } + + public static Operand IMaximumS32(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.Maximum, Local(), a, b); + } + + public static Operand IMaximumU32(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.MaximumU32, Local(), a, b); + } + + public static Operand IMinimumS32(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.Minimum, Local(), a, b); + } + + public static Operand IMinimumU32(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.MinimumU32, Local(), a, b); + } + + public static Operand IMultiply(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.Multiply, Local(), a, b); + } + + public static Operand INegate(this EmitterContext context, Operand a, bool neg) + { + if (neg) + { + a = context.INegate(a); + } + + return a; + } + + public static Operand INegate(this EmitterContext context, Operand a) + { + return context.Add(Instruction.Negate, Local(), a); + } + + public static Operand ISubtract(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.Subtract, Local(), a, b); + } + + public static Operand IsNan(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.IsNan, Local(), a); + } + + public static Operand Load(this EmitterContext context, StorageKind storageKind, IoVariable ioVariable, Operand primVertex = null) + { + return primVertex != null + ? context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), primVertex) + : context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable)); + } + + public static Operand Load( + this EmitterContext context, + StorageKind storageKind, + IoVariable ioVariable, + Operand primVertex, + Operand elemIndex) + { + return primVertex != null + ? context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), primVertex, elemIndex) + : context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), elemIndex); + } + + public static Operand Load( + this EmitterContext context, + StorageKind storageKind, + IoVariable ioVariable, + Operand primVertex, + Operand arrayIndex, + Operand elemIndex) + { + return primVertex != null + ? context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), primVertex, arrayIndex, elemIndex) + : context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), arrayIndex, elemIndex); + } + + public static Operand LoadConstant(this EmitterContext context, Operand a, Operand b) + { + if (a.Type == OperandType.Constant) + { + context.Config.SetUsedConstantBuffer(a.Value); + } + else + { + context.Config.SetUsedFeature(FeatureFlags.CbIndexing); + } + + return context.Add(Instruction.LoadConstant, Local(), a, b); + } + + public static Operand LoadGlobal(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.LoadGlobal, Local(), a, b); + } + + public static Operand LoadLocal(this EmitterContext context, Operand a) + { + return context.Add(Instruction.LoadLocal, Local(), a); + } + + public static Operand LoadShared(this EmitterContext context, Operand a) + { + return context.Add(Instruction.LoadShared, Local(), a); + } + + public static Operand MemoryBarrier(this EmitterContext context) + { + return context.Add(Instruction.MemoryBarrier); + } + + public static Operand MultiplyHighS32(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.MultiplyHighS32, Local(), a, b); + } + + public static Operand MultiplyHighU32(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.MultiplyHighU32, Local(), a, b); + } + + public static Operand PackDouble2x32(this EmitterContext context, double value) + { + long valueAsLong = BitConverter.DoubleToInt64Bits(value); + + return context.Add(Instruction.PackDouble2x32, Local(), Const((int)valueAsLong), Const((int)(valueAsLong >> 32))); + } + + public static Operand PackDouble2x32(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.PackDouble2x32, Local(), a, b); + } + + public static Operand PackHalf2x16(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.PackHalf2x16, Local(), a, b); + } + + public static void Return(this EmitterContext context) + { + context.PrepareForReturn(); + context.Add(Instruction.Return); + } + + public static void Return(this EmitterContext context, Operand returnValue) + { + context.PrepareForReturn(); + context.Add(Instruction.Return, null, returnValue); + } + + public static Operand ShiftLeft(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.ShiftLeft, Local(), a, b); + } + + public static Operand ShiftRightS32(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.ShiftRightS32, Local(), a, b); + } + + public static Operand ShiftRightU32(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.ShiftRightU32, Local(), a, b); + } + + public static (Operand, Operand) Shuffle(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.Shuffle, (Local(), Local()), a, b, c); + } + + public static (Operand, Operand) ShuffleDown(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.ShuffleDown, (Local(), Local()), a, b, c); + } + + public static (Operand, Operand) ShuffleUp(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.ShuffleUp, (Local(), Local()), a, b, c); + } + + public static (Operand, Operand) ShuffleXor(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.ShuffleXor, (Local(), Local()), a, b, c); + } + + public static Operand Store( + this EmitterContext context, + StorageKind storageKind, + IoVariable ioVariable, + Operand invocationId, + Operand value) + { + return invocationId != null + ? context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), invocationId, value) + : context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), value); + } + + public static Operand Store( + this EmitterContext context, + StorageKind storageKind, + IoVariable ioVariable, + Operand invocationId, + Operand elemIndex, + Operand value) + { + return invocationId != null + ? context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), invocationId, elemIndex, value) + : context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), elemIndex, value); + } + + public static Operand Store( + this EmitterContext context, + StorageKind storageKind, + IoVariable ioVariable, + Operand invocationId, + Operand arrayIndex, + Operand elemIndex, + Operand value) + { + return invocationId != null + ? context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), invocationId, arrayIndex, elemIndex, value) + : context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), arrayIndex, elemIndex, value); + } + + public static Operand StoreGlobal(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.StoreGlobal, null, a, b, c); + } + + public static Operand StoreGlobal16(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.StoreGlobal16, null, a, b, c); + } + + public static Operand StoreGlobal8(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.StoreGlobal8, null, a, b, c); + } + + public static Operand StoreLocal(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.StoreLocal, null, a, b); + } + + public static Operand StoreShared(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.StoreShared, null, a, b); + } + + public static Operand StoreShared16(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.StoreShared16, null, a, b); + } + + public static Operand StoreShared8(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.StoreShared8, null, a, b); + } + + public static Operand UnpackDouble2x32High(this EmitterContext context, Operand a) + { + return UnpackDouble2x32(context, a, 1); + } + + public static Operand UnpackDouble2x32Low(this EmitterContext context, Operand a) + { + return UnpackDouble2x32(context, a, 0); + } + + private static Operand UnpackDouble2x32(this EmitterContext context, Operand a, int index) + { + Operand dest = Local(); + + context.Add(new Operation(Instruction.UnpackDouble2x32, index, dest, a)); + + return dest; + } + + public static Operand UnpackHalf2x16High(this EmitterContext context, Operand a) + { + return UnpackHalf2x16(context, a, 1); + } + + public static Operand UnpackHalf2x16Low(this EmitterContext context, Operand a) + { + return UnpackHalf2x16(context, a, 0); + } + + private static Operand UnpackHalf2x16(this EmitterContext context, Operand a, int index) + { + Operand dest = Local(); + + context.Add(new Operation(Instruction.UnpackHalf2x16, index, dest, a)); + + return dest; + } + + public static Operand VoteAll(this EmitterContext context, Operand a) + { + return context.Add(Instruction.VoteAll, Local(), a); + } + + public static Operand VoteAllEqual(this EmitterContext context, Operand a) + { + return context.Add(Instruction.VoteAllEqual, Local(), a); + } + + public static Operand VoteAny(this EmitterContext context, Operand a) + { + return context.Add(Instruction.VoteAny, Local(), a); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs b/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs new file mode 100644 index 00000000..c035f212 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs @@ -0,0 +1,27 @@ +using System; + +namespace Ryujinx.Graphics.Shader.Translation +{ + /// <summary> + /// Features used by the shader that are important for the code generator to know in advance. + /// These typically change the declarations in the shader header. + /// </summary> + [Flags] + public enum FeatureFlags + { + None = 0, + + // Affected by resolution scaling. + IntegerSampling = 1 << 0, + FragCoordXY = 1 << 1, + + Bindless = 1 << 2, + InstanceId = 1 << 3, + DrawParameters = 1 << 4, + RtLayer = 1 << 5, + CbIndexing = 1 << 6, + IaIndexing = 1 << 7, + OaIndexing = 1 << 8, + FixedFuncAttr = 1 << 9 + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/FunctionMatch.cs b/src/Ryujinx.Graphics.Shader/Translation/FunctionMatch.cs new file mode 100644 index 00000000..073e120a --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/FunctionMatch.cs @@ -0,0 +1,866 @@ +using Ryujinx.Graphics.Shader.Decoders; +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Shader.Translation +{ + static class FunctionMatch + { + private static IPatternTreeNode[] _fsiGetAddressTree = PatternTrees.GetFsiGetAddress(); + private static IPatternTreeNode[] _fsiGetAddressV2Tree = PatternTrees.GetFsiGetAddressV2(); + private static IPatternTreeNode[] _fsiIsLastWarpThreadPatternTree = PatternTrees.GetFsiIsLastWarpThread(); + private static IPatternTreeNode[] _fsiBeginPatternTree = PatternTrees.GetFsiBeginPattern(); + private static IPatternTreeNode[] _fsiEndPatternTree = PatternTrees.GetFsiEndPattern(); + + public static void RunPass(DecodedProgram program) + { + byte[] externalRegs = new byte[4]; + bool hasGetAddress = false; + + foreach (DecodedFunction function in program) + { + if (function == program.MainFunction) + { + continue; + } + + int externalReg4 = 0; + + TreeNode[] functionTree = BuildTree(function.Blocks); + + if (Matches(_fsiGetAddressTree, functionTree)) + { + externalRegs[1] = functionTree[0].GetRd(); + externalRegs[2] = functionTree[2].GetRd(); + externalRegs[3] = functionTree[1].GetRd(); + externalReg4 = functionTree[3].GetRd(); + } + else if (Matches(_fsiGetAddressV2Tree, functionTree)) + { + externalRegs[1] = functionTree[2].GetRd(); + externalRegs[2] = functionTree[1].GetRd(); + externalRegs[3] = functionTree[0].GetRd(); + externalReg4 = functionTree[3].GetRd(); + } + + // Ensure the register allocation is valid. + // If so, then we have a match. + if (externalRegs[1] != externalRegs[2] && + externalRegs[2] != externalRegs[3] && + externalRegs[1] != externalRegs[3] && + externalRegs[1] + 1 != externalRegs[2] && + externalRegs[1] + 1 != externalRegs[3] && + externalRegs[1] + 1 == externalReg4 && + externalRegs[2] != RegisterConsts.RegisterZeroIndex && + externalRegs[3] != RegisterConsts.RegisterZeroIndex && + externalReg4 != RegisterConsts.RegisterZeroIndex) + { + hasGetAddress = true; + function.Type = FunctionType.Unused; + break; + } + } + + foreach (DecodedFunction function in program) + { + if (function.IsCompilerGenerated || function == program.MainFunction) + { + continue; + } + + if (hasGetAddress) + { + TreeNode[] functionTree = BuildTree(function.Blocks); + + if (MatchesFsi(_fsiBeginPatternTree, program, function, functionTree, externalRegs)) + { + function.Type = FunctionType.BuiltInFSIBegin; + continue; + } + else if (MatchesFsi(_fsiEndPatternTree, program, function, functionTree, externalRegs)) + { + function.Type = FunctionType.BuiltInFSIEnd; + continue; + } + } + } + } + + private readonly struct TreeNodeUse + { + public TreeNode Node { get; } + public int Index { get; } + public bool Inverted { get; } + + private TreeNodeUse(int index, bool inverted, TreeNode node) + { + Index = index; + Inverted = inverted; + Node = node; + } + + public TreeNodeUse(int index, TreeNode node) : this(index, false, node) + { + } + + public TreeNodeUse Flip() + { + return new TreeNodeUse(Index, !Inverted, Node); + } + } + + private enum TreeNodeType : byte + { + Op, + Label + } + + private class TreeNode + { + public readonly InstOp Op; + public readonly List<TreeNodeUse> Uses; + public TreeNodeType Type { get; } + public byte Order { get; } + + public TreeNode(byte order) + { + Type = TreeNodeType.Label; + Order = order; + } + + public TreeNode(InstOp op, byte order) + { + Op = op; + Uses = new List<TreeNodeUse>(); + Type = TreeNodeType.Op; + Order = order; + } + + public byte GetPd() + { + return (byte)((Op.RawOpCode >> 3) & 7); + } + + public byte GetRd() + { + return (byte)Op.RawOpCode; + } + } + + private static TreeNode[] BuildTree(Block[] blocks) + { + List<TreeNode> nodes = new List<TreeNode>(); + + Dictionary<ulong, TreeNode> labels = new Dictionary<ulong, TreeNode>(); + + TreeNodeUse[] predDefs = new TreeNodeUse[RegisterConsts.PredsCount]; + TreeNodeUse[] gprDefs = new TreeNodeUse[RegisterConsts.GprsCount]; + + void DefPred(byte predIndex, int index, TreeNode node) + { + if (predIndex != RegisterConsts.PredicateTrueIndex) + { + predDefs[predIndex] = new TreeNodeUse(index, node); + } + } + + void DefGpr(byte regIndex, int index, TreeNode node) + { + if (regIndex != RegisterConsts.RegisterZeroIndex) + { + gprDefs[regIndex] = new TreeNodeUse(index, node); + } + } + + TreeNodeUse UsePred(byte predIndex, bool predInv) + { + if (predIndex != RegisterConsts.PredicateTrueIndex) + { + TreeNodeUse use = predDefs[predIndex]; + + if (use.Node != null) + { + nodes.Remove(use.Node); + } + else + { + use = new TreeNodeUse(-(predIndex + 2), null); + } + + return predInv ? use.Flip() : use; + } + + return new TreeNodeUse(-1, null); + } + + TreeNodeUse UseGpr(byte regIndex) + { + if (regIndex != RegisterConsts.RegisterZeroIndex) + { + TreeNodeUse use = gprDefs[regIndex]; + + if (use.Node != null) + { + nodes.Remove(use.Node); + } + else + { + use = new TreeNodeUse(-(regIndex + 2), null); + } + + return use; + } + + return new TreeNodeUse(-1, null); + } + + byte order = 0; + + for (int index = 0; index < blocks.Length; index++) + { + Block block = blocks[index]; + + if (block.Predecessors.Count > 1) + { + TreeNode label = new TreeNode(order++); + nodes.Add(label); + labels.Add(block.Address, label); + } + + for (int opIndex = 0; opIndex < block.OpCodes.Count; opIndex++) + { + InstOp op = block.OpCodes[opIndex]; + + TreeNode node = new TreeNode(op, IsOrderDependant(op.Name) ? order : (byte)0); + + // Add uses. + + if (!op.Props.HasFlag(InstProps.NoPred)) + { + byte predIndex = (byte)((op.RawOpCode >> 16) & 7); + bool predInv = (op.RawOpCode & 0x80000) != 0; + node.Uses.Add(UsePred(predIndex, predInv)); + } + + if (op.Props.HasFlag(InstProps.Ps)) + { + byte predIndex = (byte)((op.RawOpCode >> 39) & 7); + bool predInv = (op.RawOpCode & 0x40000000000) != 0; + node.Uses.Add(UsePred(predIndex, predInv)); + } + + if (op.Props.HasFlag(InstProps.Ra)) + { + byte ra = (byte)(op.RawOpCode >> 8); + node.Uses.Add(UseGpr(ra)); + } + + if ((op.Props & (InstProps.Rb | InstProps.Rb2)) != 0) + { + byte rb = op.Props.HasFlag(InstProps.Rb2) ? (byte)op.RawOpCode : (byte)(op.RawOpCode >> 20); + node.Uses.Add(UseGpr(rb)); + } + + if (op.Props.HasFlag(InstProps.Rc)) + { + byte rc = (byte)(op.RawOpCode >> 39); + node.Uses.Add(UseGpr(rc)); + } + + if (op.Name == InstName.Bra && labels.TryGetValue(op.GetAbsoluteAddress(), out TreeNode label)) + { + node.Uses.Add(new TreeNodeUse(0, label)); + } + + // Make definitions. + + int defIndex = 0; + + InstProps pdType = op.Props & InstProps.PdMask; + + if (pdType != 0) + { + int bit = pdType switch + { + InstProps.Pd => 3, + InstProps.LPd => 48, + InstProps.SPd => 30, + InstProps.TPd => 51, + InstProps.VPd => 45, + _ => throw new InvalidOperationException($"Table has unknown predicate destination {pdType}.") + }; + + byte predIndex = (byte)((op.RawOpCode >> bit) & 7); + DefPred(predIndex, defIndex++, node); + } + + if (op.Props.HasFlag(InstProps.Rd)) + { + byte rd = (byte)op.RawOpCode; + DefGpr(rd, defIndex++, node); + } + + nodes.Add(node); + } + } + + return nodes.ToArray(); + } + + private static bool IsOrderDependant(InstName name) + { + switch (name) + { + case InstName.Atom: + case InstName.AtomCas: + case InstName.Atoms: + case InstName.AtomsCas: + case InstName.Ld: + case InstName.Ldg: + case InstName.Ldl: + case InstName.Lds: + case InstName.Suatom: + case InstName.SuatomB: + case InstName.SuatomB2: + case InstName.SuatomCas: + case InstName.SuatomCasB: + case InstName.Suld: + case InstName.SuldB: + case InstName.SuldD: + case InstName.SuldDB: + return true; + } + + return false; + } + + private interface IPatternTreeNode + { + List<PatternTreeNodeUse> Uses { get; } + InstName Name { get; } + TreeNodeType Type { get; } + byte Order { get; } + bool IsImm { get; } + bool Matches(in InstOp opInfo); + } + + private readonly struct PatternTreeNodeUse + { + public IPatternTreeNode Node { get; } + public int Index { get; } + public bool Inverted { get; } + public PatternTreeNodeUse Inv => new PatternTreeNodeUse(Index, !Inverted, Node); + + private PatternTreeNodeUse(int index, bool inverted, IPatternTreeNode node) + { + Index = index; + Inverted = inverted; + Node = node; + } + + public PatternTreeNodeUse(int index, IPatternTreeNode node) : this(index, false, node) + { + } + } + + private class PatternTreeNode<T> : IPatternTreeNode + { + public List<PatternTreeNodeUse> Uses { get; } + private readonly Func<T, bool> _match; + + public InstName Name { get; } + public TreeNodeType Type { get; } + public byte Order { get; } + public bool IsImm { get; } + public PatternTreeNodeUse Out => new PatternTreeNodeUse(0, this); + + public PatternTreeNode(InstName name, Func<T, bool> match, TreeNodeType type = TreeNodeType.Op, byte order = 0, bool isImm = false) + { + Name = name; + _match = match; + Type = type; + Order = order; + IsImm = isImm; + Uses = new List<PatternTreeNodeUse>(); + } + + public PatternTreeNode<T> Use(PatternTreeNodeUse use) + { + Uses.Add(use); + return this; + } + + public PatternTreeNodeUse OutAt(int index) + { + return new PatternTreeNodeUse(index, this); + } + + public bool Matches(in InstOp opInfo) + { + if (opInfo.Name != Name) + { + return false; + } + + ulong rawOp = opInfo.RawOpCode; + T op = Unsafe.As<ulong, T>(ref rawOp); + + if (!_match(op)) + { + return false; + } + + return true; + } + } + + private static bool MatchesFsi( + IPatternTreeNode[] pattern, + DecodedProgram program, + DecodedFunction function, + TreeNode[] functionTree, + byte[] externalRegs) + { + if (function.Blocks.Length == 0) + { + return false; + } + + InstOp callOp = function.Blocks[0].GetLastOp(); + + if (callOp.Name != InstName.Cal) + { + return false; + } + + DecodedFunction callTarget = program.GetFunctionByAddress(callOp.GetAbsoluteAddress()); + TreeNode[] callTargetTree = null; + + if (callTarget == null || !Matches(_fsiIsLastWarpThreadPatternTree, callTargetTree = BuildTree(callTarget.Blocks))) + { + return false; + } + + externalRegs[0] = callTargetTree[0].GetPd(); + + if (Matches(pattern, functionTree, externalRegs)) + { + callTarget.RemoveCaller(function); + return true; + } + + return false; + } + + private static bool Matches(IPatternTreeNode[] pTree, TreeNode[] cTree, byte[] externalRegs = null) + { + if (pTree.Length != cTree.Length) + { + return false; + } + + for (int index = 0; index < pTree.Length; index++) + { + if (!Matches(pTree[index], cTree[index], externalRegs)) + { + return false; + } + } + + return true; + } + + private static bool Matches(IPatternTreeNode pTreeNode, TreeNode cTreeNode, byte[] externalRegs) + { + if (!pTreeNode.Matches(in cTreeNode.Op) || + pTreeNode.Type != cTreeNode.Type || + pTreeNode.Order != cTreeNode.Order || + pTreeNode.IsImm != cTreeNode.Op.Props.HasFlag(InstProps.Ib)) + { + return false; + } + + if (pTreeNode.Type == TreeNodeType.Op) + { + if (pTreeNode.Uses.Count != cTreeNode.Uses.Count) + { + return false; + } + + for (int index = 0; index < pTreeNode.Uses.Count; index++) + { + var pUse = pTreeNode.Uses[index]; + var cUse = cTreeNode.Uses[index]; + + if (pUse.Index <= -2) + { + if (externalRegs[-pUse.Index - 2] != (-cUse.Index - 2)) + { + return false; + } + } + else if (pUse.Index != cUse.Index) + { + return false; + } + + if (pUse.Inverted != cUse.Inverted || (pUse.Node == null) != (cUse.Node == null)) + { + return false; + } + + if (pUse.Node != null && !Matches(pUse.Node, cUse.Node, externalRegs)) + { + return false; + } + } + } + + return true; + } + + private static class PatternTrees + { + public static IPatternTreeNode[] GetFsiGetAddress() + { + var affinityValue = S2r(SReg.Affinity).Use(PT).Out; + var orderingTicketValue = S2r(SReg.OrderingTicket).Use(PT).Out; + + return new IPatternTreeNode[] + { + Iscadd(cc: true, 2, 0, 404) + .Use(PT) + .Use(Iscadd(cc: false, 8) + .Use(PT) + .Use(Lop32i(LogicOp.And, 0xff) + .Use(PT) + .Use(affinityValue).Out) + .Use(Lop32i(LogicOp.And, 0xff) + .Use(PT) + .Use(orderingTicketValue).Out).Out), + ShrU32W(16) + .Use(PT) + .Use(orderingTicketValue), + Iadd32i(0x200) + .Use(PT) + .Use(Lop32i(LogicOp.And, 0xfe00) + .Use(PT) + .Use(orderingTicketValue).Out), + Iadd(x: true, 0, 405).Use(PT).Use(RZ), + Ret().Use(PT) + }; + } + + public static IPatternTreeNode[] GetFsiGetAddressV2() + { + var affinityValue = S2r(SReg.Affinity).Use(PT).Out; + var orderingTicketValue = S2r(SReg.OrderingTicket).Use(PT).Out; + + return new IPatternTreeNode[] + { + ShrU32W(16) + .Use(PT) + .Use(orderingTicketValue), + Iadd32i(0x200) + .Use(PT) + .Use(Lop32i(LogicOp.And, 0xfe00) + .Use(PT) + .Use(orderingTicketValue).Out), + Iscadd(cc: true, 2, 0, 404) + .Use(PT) + .Use(Bfi(0x808) + .Use(PT) + .Use(affinityValue) + .Use(Lop32i(LogicOp.And, 0xff) + .Use(PT) + .Use(orderingTicketValue).Out).Out), + Iadd(x: true, 0, 405).Use(PT).Use(RZ), + Ret().Use(PT) + }; + } + + public static IPatternTreeNode[] GetFsiIsLastWarpThread() + { + var threadKillValue = S2r(SReg.ThreadKill).Use(PT).Out; + var laneIdValue = S2r(SReg.LaneId).Use(PT).Out; + + return new IPatternTreeNode[] + { + IsetpU32(IComp.Eq) + .Use(PT) + .Use(PT) + .Use(FloU32() + .Use(PT) + .Use(Vote(VoteMode.Any) + .Use(PT) + .Use(IsetpU32(IComp.Ne) + .Use(PT) + .Use(PT) + .Use(Lop(negB: true, LogicOp.PassB) + .Use(PT) + .Use(RZ) + .Use(threadKillValue).OutAt(1)) + .Use(RZ).Out).OutAt(1)).Out) + .Use(laneIdValue), + Ret().Use(PT) + }; + } + + public static IPatternTreeNode[] GetFsiBeginPattern() + { + var addressLowValue = CallArg(1); + + static PatternTreeNodeUse HighU16Equals(PatternTreeNodeUse x) + { + var expectedValue = CallArg(3); + + return IsetpU32(IComp.Eq) + .Use(PT) + .Use(PT) + .Use(ShrU32W(16).Use(PT).Use(x).Out) + .Use(expectedValue).Out; + } + + PatternTreeNode<byte> label; + + return new IPatternTreeNode[] + { + Cal(), + Ret().Use(CallArg(0).Inv), + Ret() + .Use(HighU16Equals(LdgE(CacheOpLd.Cg, LsSize.B32) + .Use(PT) + .Use(addressLowValue).Out)), + label = Label(), + Bra() + .Use(HighU16Equals(LdgE(CacheOpLd.Cg, LsSize.B32, 1) + .Use(PT) + .Use(addressLowValue).Out).Inv) + .Use(label.Out), + Ret().Use(PT) + }; + } + + public static IPatternTreeNode[] GetFsiEndPattern() + { + var voteResult = Vote(VoteMode.All).Use(PT).Use(PT).OutAt(1); + var popcResult = Popc().Use(PT).Use(voteResult).Out; + var threadKillValue = S2r(SReg.ThreadKill).Use(PT).Out; + var laneIdValue = S2r(SReg.LaneId).Use(PT).Out; + + var addressLowValue = CallArg(1); + var incrementValue = CallArg(2); + + return new IPatternTreeNode[] + { + Cal(), + Ret().Use(CallArg(0).Inv), + Membar(Decoders.Membar.Vc).Use(PT), + Ret().Use(IsetpU32(IComp.Ne) + .Use(PT) + .Use(PT) + .Use(threadKillValue) + .Use(RZ).Out), + RedE(RedOp.Add, AtomSize.U32) + .Use(IsetpU32(IComp.Eq) + .Use(PT) + .Use(PT) + .Use(FloU32() + .Use(PT) + .Use(voteResult).Out) + .Use(laneIdValue).Out) + .Use(addressLowValue) + .Use(Xmad(XmadCop.Cbcc, psl: true, hiloA: true, hiloB: true) + .Use(PT) + .Use(incrementValue) + .Use(Xmad(XmadCop.Cfull, mrg: true, hiloB: true) + .Use(PT) + .Use(incrementValue) + .Use(popcResult) + .Use(RZ).Out) + .Use(Xmad(XmadCop.Cfull) + .Use(PT) + .Use(incrementValue) + .Use(popcResult) + .Use(RZ).Out).Out), + Ret().Use(PT) + }; + } + + private static PatternTreeNode<InstBfiI> Bfi(int imm) + { + return new(InstName.Bfi, (op) => !op.WriteCC && op.Imm20 == imm, isImm: true); + } + + private static PatternTreeNode<InstBra> Bra() + { + return new(InstName.Bra, (op) => op.Ccc == Ccc.T && !op.Ca); + } + + private static PatternTreeNode<InstCal> Cal() + { + return new(InstName.Cal, (op) => !op.Ca && op.Inc); + } + + private static PatternTreeNode<InstFloR> FloU32() + { + return new(InstName.Flo, (op) => !op.Signed && !op.Sh && !op.NegB && !op.WriteCC); + } + + private static PatternTreeNode<InstIaddC> Iadd(bool x, int cbufSlot, int cbufOffset) + { + return new(InstName.Iadd, (op) => + !op.Sat && + !op.WriteCC && + op.X == x && + op.AvgMode == AvgMode.NoNeg && + op.CbufSlot == cbufSlot && + op.CbufOffset == cbufOffset); + } + + private static PatternTreeNode<InstIadd32i> Iadd32i(int imm) + { + return new(InstName.Iadd32i, (op) => !op.Sat && !op.WriteCC && !op.X && op.AvgMode == AvgMode.NoNeg && op.Imm32 == imm); + } + + private static PatternTreeNode<InstIscaddR> Iscadd(bool cc, int imm) + { + return new(InstName.Iscadd, (op) => op.WriteCC == cc && op.AvgMode == AvgMode.NoNeg && op.Imm5 == imm); + } + + private static PatternTreeNode<InstIscaddC> Iscadd(bool cc, int imm, int cbufSlot, int cbufOffset) + { + return new(InstName.Iscadd, (op) => + op.WriteCC == cc && + op.AvgMode == AvgMode.NoNeg && + op.Imm5 == imm && + op.CbufSlot == cbufSlot && + op.CbufOffset == cbufOffset); + } + + private static PatternTreeNode<InstIsetpR> IsetpU32(IComp comp) + { + return new(InstName.Isetp, (op) => !op.Signed && op.IComp == comp && op.Bop == BoolOp.And); + } + + private static PatternTreeNode<byte> Label() + { + return new(InstName.Invalid, (op) => true, type: TreeNodeType.Label); + } + + private static PatternTreeNode<InstLopR> Lop(bool negB, LogicOp logicOp) + { + return new(InstName.Lop, (op) => !op.NegA && op.NegB == negB && !op.WriteCC && !op.X && op.Lop == logicOp && op.PredicateOp == PredicateOp.F); + } + + private static PatternTreeNode<InstLop32i> Lop32i(LogicOp logicOp, int imm) + { + return new(InstName.Lop32i, (op) => !op.NegA && !op.NegB && !op.X && !op.WriteCC && op.LogicOp == logicOp && op.Imm32 == imm); + } + + private static PatternTreeNode<InstMembar> Membar(Membar membar) + { + return new(InstName.Membar, (op) => op.Membar == membar); + } + + private static PatternTreeNode<InstPopcR> Popc() + { + return new(InstName.Popc, (op) => !op.NegB); + } + + private static PatternTreeNode<InstRet> Ret() + { + return new(InstName.Ret, (op) => op.Ccc == Ccc.T); + } + + private static PatternTreeNode<InstS2r> S2r(SReg reg) + { + return new(InstName.S2r, (op) => op.SReg == reg); + } + + private static PatternTreeNode<InstShrI> ShrU32W(int imm) + { + return new(InstName.Shr, (op) => !op.Signed && !op.Brev && op.M && op.XMode == 0 && op.Imm20 == imm, isImm: true); + } + + private static PatternTreeNode<InstLdg> LdgE(CacheOpLd cacheOp, LsSize size, byte order = 0) + { + return new(InstName.Ldg, (op) => op.E && op.CacheOp == cacheOp && op.LsSize == size, order: order); + } + + private static PatternTreeNode<InstRed> RedE(RedOp redOp, AtomSize size, byte order = 0) + { + return new(InstName.Red, (op) => op.E && op.RedOp == redOp && op.RedSize == size, order: order); + } + + private static PatternTreeNode<InstVote> Vote(VoteMode mode) + { + return new(InstName.Vote, (op) => op.VoteMode == mode); + } + + private static PatternTreeNode<InstXmadR> Xmad(XmadCop cop, bool psl = false, bool mrg = false, bool hiloA = false, bool hiloB = false) + { + return new(InstName.Xmad, (op) => op.XmadCop == cop && op.Psl == psl && op.Mrg == mrg && op.HiloA == hiloA && op.HiloB == hiloB); + } + + private static PatternTreeNodeUse PT => PTOrRZ(); + private static PatternTreeNodeUse RZ => PTOrRZ(); + private static PatternTreeNodeUse Undef => new PatternTreeNodeUse(0, null); + + private static PatternTreeNodeUse CallArg(int index) + { + return new PatternTreeNodeUse(-(index + 2), null); + } + + private static PatternTreeNodeUse PTOrRZ() + { + return new PatternTreeNodeUse(-1, null); + } + } + + private static void PrintTreeNode(TreeNode node, string indentation) + { + Console.WriteLine($" {node.Op.Name}"); + + for (int i = 0; i < node.Uses.Count; i++) + { + TreeNodeUse use = node.Uses[i]; + bool last = i == node.Uses.Count - 1; + char separator = last ? '`' : '|'; + + if (use.Node != null) + { + Console.Write($"{indentation} {separator}- ({(use.Inverted ? "INV " : "")}{use.Index})"); + PrintTreeNode(use.Node, indentation + (last ? " " : " | ")); + } + else + { + Console.WriteLine($"{indentation} {separator}- ({(use.Inverted ? "INV " : "")}{use.Index}) NULL"); + } + } + } + + private static void PrintTreeNode(IPatternTreeNode node, string indentation) + { + Console.WriteLine($" {node.Name}"); + + for (int i = 0; i < node.Uses.Count; i++) + { + PatternTreeNodeUse use = node.Uses[i]; + bool last = i == node.Uses.Count - 1; + char separator = last ? '`' : '|'; + + if (use.Node != null) + { + Console.Write($"{indentation} {separator}- ({(use.Inverted ? "INV " : "")}{use.Index})"); + PrintTreeNode(use.Node, indentation + (last ? " " : " | ")); + } + else + { + Console.WriteLine($"{indentation} {separator}- ({(use.Inverted ? "INV " : "")}{use.Index}) NULL"); + } + } + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs b/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs new file mode 100644 index 00000000..774a128d --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs @@ -0,0 +1,52 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; + +namespace Ryujinx.Graphics.Shader.Translation +{ + static class GlobalMemory + { + private const int StorageDescsBaseOffset = 0x44; // In words. + + public const int StorageDescSize = 4; // In words. + public const int StorageMaxCount = 16; + + public const int StorageDescsSize = StorageDescSize * StorageMaxCount; + + public const int UbeBaseOffset = 0x98; // In words. + public const int UbeMaxCount = 9; + public const int UbeDescsSize = StorageDescSize * UbeMaxCount; + public const int UbeFirstCbuf = 8; + + public static bool UsesGlobalMemory(Instruction inst, StorageKind storageKind) + { + return (inst.IsAtomic() && storageKind == StorageKind.GlobalMemory) || + inst == Instruction.LoadGlobal || + inst == Instruction.StoreGlobal || + inst == Instruction.StoreGlobal16 || + inst == Instruction.StoreGlobal8; + } + + public static int GetStorageCbOffset(ShaderStage stage, int slot) + { + return GetStorageBaseCbOffset(stage) + slot * StorageDescSize; + } + + public static int GetStorageBaseCbOffset(ShaderStage stage) + { + return stage switch + { + ShaderStage.Compute => StorageDescsBaseOffset + 2 * StorageDescsSize, + ShaderStage.Vertex => StorageDescsBaseOffset, + ShaderStage.TessellationControl => StorageDescsBaseOffset + 1 * StorageDescsSize, + ShaderStage.TessellationEvaluation => StorageDescsBaseOffset + 2 * StorageDescsSize, + ShaderStage.Geometry => StorageDescsBaseOffset + 3 * StorageDescsSize, + ShaderStage.Fragment => StorageDescsBaseOffset + 4 * StorageDescsSize, + _ => 0 + }; + } + + public static int GetConstantUbeOffset(int slot) + { + return UbeBaseOffset + slot * StorageDescSize; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs new file mode 100644 index 00000000..0c196c4d --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs @@ -0,0 +1,263 @@ +using Ryujinx.Graphics.Shader.Instructions; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Shader.Translation.Optimizations +{ + class BindlessElimination + { + public static void RunPass(BasicBlock block, ShaderConfig config) + { + // We can turn a bindless into regular access by recognizing the pattern + // produced by the compiler for separate texture and sampler. + // We check for the following conditions: + // - The handle is a constant buffer value. + // - The handle is the result of a bitwise OR logical operation. + // - Both sources of the OR operation comes from a constant buffer. + for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next) + { + if (!(node.Value is TextureOperation texOp)) + { + continue; + } + + if ((texOp.Flags & TextureFlags.Bindless) == 0) + { + continue; + } + + if (texOp.Inst == Instruction.Lod || + texOp.Inst == Instruction.TextureSample || + texOp.Inst == Instruction.TextureSize) + { + Operand bindlessHandle = Utils.FindLastOperation(texOp.GetSource(0), block); + + // Some instructions do not encode an accurate sampler type: + // - Most instructions uses the same type for 1D and Buffer. + // - Query instructions may not have any type. + // For those cases, we need to try getting the type from current GPU state, + // as long bindless elimination is successful and we know where the texture descriptor is located. + bool rewriteSamplerType = + texOp.Type == SamplerType.TextureBuffer || + texOp.Inst == Instruction.TextureSize; + + if (bindlessHandle.Type == OperandType.ConstantBuffer) + { + SetHandle(config, texOp, bindlessHandle.GetCbufOffset(), bindlessHandle.GetCbufSlot(), rewriteSamplerType, isImage: false); + continue; + } + + if (!(bindlessHandle.AsgOp is Operation handleCombineOp)) + { + continue; + } + + if (handleCombineOp.Inst != Instruction.BitwiseOr) + { + continue; + } + + Operand src0 = Utils.FindLastOperation(handleCombineOp.GetSource(0), block); + Operand src1 = Utils.FindLastOperation(handleCombineOp.GetSource(1), block); + + // For cases where we have a constant, ensure that the constant is always + // the second operand. + // Since this is a commutative operation, both are fine, + // and having a "canonical" representation simplifies some checks below. + if (src0.Type == OperandType.Constant && src1.Type != OperandType.Constant) + { + Operand temp = src1; + src1 = src0; + src0 = temp; + } + + TextureHandleType handleType = TextureHandleType.SeparateSamplerHandle; + + // Try to match the following patterns: + // Masked pattern: + // - samplerHandle = samplerHandle & 0xFFF00000; + // - textureHandle = textureHandle & 0xFFFFF; + // - combinedHandle = samplerHandle | textureHandle; + // Where samplerHandle and textureHandle comes from a constant buffer. + // Shifted pattern: + // - samplerHandle = samplerId << 20; + // - combinedHandle = samplerHandle | textureHandle; + // Where samplerId and textureHandle comes from a constant buffer. + // Constant pattern: + // - combinedHandle = samplerHandleConstant | textureHandle; + // Where samplerHandleConstant is a constant value, and textureHandle comes from a constant buffer. + if (src0.AsgOp is Operation src0AsgOp) + { + if (src1.AsgOp is Operation src1AsgOp && + src0AsgOp.Inst == Instruction.BitwiseAnd && + src1AsgOp.Inst == Instruction.BitwiseAnd) + { + src0 = GetSourceForMaskedHandle(src0AsgOp, 0xFFFFF); + src1 = GetSourceForMaskedHandle(src1AsgOp, 0xFFF00000); + + // The OR operation is commutative, so we can also try to swap the operands to get a match. + if (src0 == null || src1 == null) + { + src0 = GetSourceForMaskedHandle(src1AsgOp, 0xFFFFF); + src1 = GetSourceForMaskedHandle(src0AsgOp, 0xFFF00000); + } + + if (src0 == null || src1 == null) + { + continue; + } + } + else if (src0AsgOp.Inst == Instruction.ShiftLeft) + { + Operand shift = src0AsgOp.GetSource(1); + + if (shift.Type == OperandType.Constant && shift.Value == 20) + { + src0 = src1; + src1 = src0AsgOp.GetSource(0); + handleType = TextureHandleType.SeparateSamplerId; + } + } + } + else if (src1.AsgOp is Operation src1AsgOp && src1AsgOp.Inst == Instruction.ShiftLeft) + { + Operand shift = src1AsgOp.GetSource(1); + + if (shift.Type == OperandType.Constant && shift.Value == 20) + { + src1 = src1AsgOp.GetSource(0); + handleType = TextureHandleType.SeparateSamplerId; + } + } + else if (src1.Type == OperandType.Constant && (src1.Value & 0xfffff) == 0) + { + handleType = TextureHandleType.SeparateConstantSamplerHandle; + } + + if (src0.Type != OperandType.ConstantBuffer) + { + continue; + } + + if (handleType == TextureHandleType.SeparateConstantSamplerHandle) + { + SetHandle( + config, + texOp, + TextureHandle.PackOffsets(src0.GetCbufOffset(), ((src1.Value >> 20) & 0xfff), handleType), + TextureHandle.PackSlots(src0.GetCbufSlot(), 0), + rewriteSamplerType, + isImage: false); + } + else if (src1.Type == OperandType.ConstantBuffer) + { + SetHandle( + config, + texOp, + TextureHandle.PackOffsets(src0.GetCbufOffset(), src1.GetCbufOffset(), handleType), + TextureHandle.PackSlots(src0.GetCbufSlot(), src1.GetCbufSlot()), + rewriteSamplerType, + isImage: false); + } + } + else if (texOp.Inst == Instruction.ImageLoad || + texOp.Inst == Instruction.ImageStore || + texOp.Inst == Instruction.ImageAtomic) + { + Operand src0 = Utils.FindLastOperation(texOp.GetSource(0), block); + + if (src0.Type == OperandType.ConstantBuffer) + { + int cbufOffset = src0.GetCbufOffset(); + int cbufSlot = src0.GetCbufSlot(); + + if (texOp.Format == TextureFormat.Unknown) + { + if (texOp.Inst == Instruction.ImageAtomic) + { + texOp.Format = config.GetTextureFormatAtomic(cbufOffset, cbufSlot); + } + else + { + texOp.Format = config.GetTextureFormat(cbufOffset, cbufSlot); + } + } + + bool rewriteSamplerType = texOp.Type == SamplerType.TextureBuffer; + + SetHandle(config, texOp, cbufOffset, cbufSlot, rewriteSamplerType, isImage: true); + } + } + } + } + + private static Operand GetSourceForMaskedHandle(Operation asgOp, uint mask) + { + // Assume it was already checked that the operation is bitwise AND. + Operand src0 = asgOp.GetSource(0); + Operand src1 = asgOp.GetSource(1); + + if (src0.Type == OperandType.ConstantBuffer && src1.Type == OperandType.ConstantBuffer) + { + // We can't check if the mask matches here as both operands are from a constant buffer. + // Be optimistic and assume it matches. Avoid constant buffer 1 as official drivers + // uses this one to store compiler constants. + return src0.GetCbufSlot() == 1 ? src1 : src0; + } + else if (src0.Type == OperandType.ConstantBuffer && src1.Type == OperandType.Constant) + { + if ((uint)src1.Value == mask) + { + return src0; + } + } + else if (src0.Type == OperandType.Constant && src1.Type == OperandType.ConstantBuffer) + { + if ((uint)src0.Value == mask) + { + return src1; + } + } + + return null; + } + + private static void SetHandle(ShaderConfig config, TextureOperation texOp, int cbufOffset, int cbufSlot, bool rewriteSamplerType, bool isImage) + { + texOp.SetHandle(cbufOffset, cbufSlot); + + if (rewriteSamplerType) + { + SamplerType newType = config.GpuAccessor.QuerySamplerType(cbufOffset, cbufSlot); + + if (texOp.Inst.IsTextureQuery()) + { + texOp.Type = newType; + } + else if (texOp.Type == SamplerType.TextureBuffer && newType == SamplerType.Texture1D) + { + int coordsCount = 1; + + if (InstEmit.Sample1DAs2D) + { + newType = SamplerType.Texture2D; + texOp.InsertSource(coordsCount++, OperandHelper.Const(0)); + } + + if (!isImage && + (texOp.Flags & TextureFlags.IntCoords) != 0 && + (texOp.Flags & TextureFlags.LodLevel) == 0) + { + // IntCoords textures must always have explicit LOD. + texOp.SetLodLevelFlag(); + texOp.InsertSource(coordsCount, OperandHelper.Const(0)); + } + + texOp.Type = newType; + } + } + + config.SetUsedTexture(texOp.Inst, texOp.Type, texOp.Format, texOp.Flags, cbufSlot, cbufOffset); + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessToIndexed.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessToIndexed.cs new file mode 100644 index 00000000..ca46a1f5 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessToIndexed.cs @@ -0,0 +1,85 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation.Optimizations +{ + static class BindlessToIndexed + { + public static void RunPass(BasicBlock block, ShaderConfig config) + { + // We can turn a bindless texture access into a indexed access, + // as long the following conditions are true: + // - The handle is loaded using a LDC instruction. + // - The handle is loaded from the constant buffer with the handles (CB2 for NVN). + // - The load has a constant offset. + // The base offset of the array of handles on the constant buffer is the constant offset. + for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next) + { + if (!(node.Value is TextureOperation texOp)) + { + continue; + } + + if ((texOp.Flags & TextureFlags.Bindless) == 0) + { + continue; + } + + if (!(texOp.GetSource(0).AsgOp is Operation handleAsgOp)) + { + continue; + } + + if (handleAsgOp.Inst != Instruction.LoadConstant) + { + continue; + } + + Operand ldcSrc0 = handleAsgOp.GetSource(0); + Operand ldcSrc1 = handleAsgOp.GetSource(1); + + if (ldcSrc0.Type != OperandType.Constant || ldcSrc0.Value != 2) + { + continue; + } + + if (!(ldcSrc1.AsgOp is Operation shrOp) || shrOp.Inst != Instruction.ShiftRightU32) + { + continue; + } + + if (!(shrOp.GetSource(0).AsgOp is Operation addOp) || addOp.Inst != Instruction.Add) + { + continue; + } + + Operand addSrc1 = addOp.GetSource(1); + + if (addSrc1.Type != OperandType.Constant) + { + continue; + } + + TurnIntoIndexed(config, texOp, addSrc1.Value / 4); + + Operand index = Local(); + + Operand source = addOp.GetSource(0); + + Operation shrBy3 = new Operation(Instruction.ShiftRightU32, index, source, Const(3)); + + block.Operations.AddBefore(node, shrBy3); + + texOp.SetSource(0, index); + } + } + + private static void TurnIntoIndexed(ShaderConfig config, TextureOperation texOp, int handle) + { + texOp.TurnIntoIndexed(handle); + config.SetUsedTexture(texOp.Inst, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, handle); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BranchElimination.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BranchElimination.cs new file mode 100644 index 00000000..c87d1474 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BranchElimination.cs @@ -0,0 +1,64 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System; + +namespace Ryujinx.Graphics.Shader.Translation.Optimizations +{ + static class BranchElimination + { + public static bool RunPass(BasicBlock block) + { + if (block.HasBranch && IsRedundantBranch((Operation)block.GetLastOp(), Next(block))) + { + block.Branch = null; + + return true; + } + + return false; + } + + private static bool IsRedundantBranch(Operation current, BasicBlock nextBlock) + { + // Here we check that: + // - The current block ends with a branch. + // - The next block only contains a branch. + // - The branch on the next block is unconditional. + // - Both branches are jumping to the same location. + // In this case, the branch on the current block can be removed, + // as the next block is going to jump to the same place anyway. + if (nextBlock == null) + { + return false; + } + + if (!(nextBlock.Operations.First?.Value is Operation next)) + { + return false; + } + + if (next.Inst != Instruction.Branch) + { + return false; + } + + return current.Dest == next.Dest; + } + + private static BasicBlock Next(BasicBlock block) + { + block = block.Next; + + while (block != null && block.Operations.Count == 0) + { + if (block.HasBranch) + { + throw new InvalidOperationException("Found a bogus empty block that \"ends with a branch\"."); + } + + block = block.Next; + } + + return block; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs new file mode 100644 index 00000000..6729f077 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs @@ -0,0 +1,346 @@ +using Ryujinx.Common.Utilities; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation.Optimizations +{ + static class ConstantFolding + { + public static void RunPass(Operation operation) + { + if (!AreAllSourcesConstant(operation)) + { + return; + } + + switch (operation.Inst) + { + case Instruction.Add: + EvaluateBinary(operation, (x, y) => x + y); + break; + + case Instruction.BitCount: + EvaluateUnary(operation, (x) => BitCount(x)); + break; + + case Instruction.BitwiseAnd: + EvaluateBinary(operation, (x, y) => x & y); + break; + + case Instruction.BitwiseExclusiveOr: + EvaluateBinary(operation, (x, y) => x ^ y); + break; + + case Instruction.BitwiseNot: + EvaluateUnary(operation, (x) => ~x); + break; + + case Instruction.BitwiseOr: + EvaluateBinary(operation, (x, y) => x | y); + break; + + case Instruction.BitfieldExtractS32: + BitfieldExtractS32(operation); + break; + + case Instruction.BitfieldExtractU32: + BitfieldExtractU32(operation); + break; + + case Instruction.Clamp: + EvaluateTernary(operation, (x, y, z) => Math.Clamp(x, y, z)); + break; + + case Instruction.ClampU32: + EvaluateTernary(operation, (x, y, z) => (int)Math.Clamp((uint)x, (uint)y, (uint)z)); + break; + + case Instruction.CompareEqual: + EvaluateBinary(operation, (x, y) => x == y); + break; + + case Instruction.CompareGreater: + EvaluateBinary(operation, (x, y) => x > y); + break; + + case Instruction.CompareGreaterOrEqual: + EvaluateBinary(operation, (x, y) => x >= y); + break; + + case Instruction.CompareGreaterOrEqualU32: + EvaluateBinary(operation, (x, y) => (uint)x >= (uint)y); + break; + + case Instruction.CompareGreaterU32: + EvaluateBinary(operation, (x, y) => (uint)x > (uint)y); + break; + + case Instruction.CompareLess: + EvaluateBinary(operation, (x, y) => x < y); + break; + + case Instruction.CompareLessOrEqual: + EvaluateBinary(operation, (x, y) => x <= y); + break; + + case Instruction.CompareLessOrEqualU32: + EvaluateBinary(operation, (x, y) => (uint)x <= (uint)y); + break; + + case Instruction.CompareLessU32: + EvaluateBinary(operation, (x, y) => (uint)x < (uint)y); + break; + + case Instruction.CompareNotEqual: + EvaluateBinary(operation, (x, y) => x != y); + break; + + case Instruction.Divide: + EvaluateBinary(operation, (x, y) => y != 0 ? x / y : 0); + break; + + case Instruction.FP32 | Instruction.Add: + EvaluateFPBinary(operation, (x, y) => x + y); + break; + + case Instruction.FP32 | Instruction.Clamp: + EvaluateFPTernary(operation, (x, y, z) => Math.Clamp(x, y, z)); + break; + + case Instruction.FP32 | Instruction.CompareEqual: + EvaluateFPBinary(operation, (x, y) => x == y); + break; + + case Instruction.FP32 | Instruction.CompareGreater: + EvaluateFPBinary(operation, (x, y) => x > y); + break; + + case Instruction.FP32 | Instruction.CompareGreaterOrEqual: + EvaluateFPBinary(operation, (x, y) => x >= y); + break; + + case Instruction.FP32 | Instruction.CompareLess: + EvaluateFPBinary(operation, (x, y) => x < y); + break; + + case Instruction.FP32 | Instruction.CompareLessOrEqual: + EvaluateFPBinary(operation, (x, y) => x <= y); + break; + + case Instruction.FP32 | Instruction.CompareNotEqual: + EvaluateFPBinary(operation, (x, y) => x != y); + break; + + case Instruction.FP32 | Instruction.Divide: + EvaluateFPBinary(operation, (x, y) => x / y); + break; + + case Instruction.FP32 | Instruction.Multiply: + EvaluateFPBinary(operation, (x, y) => x * y); + break; + + case Instruction.FP32 | Instruction.Negate: + EvaluateFPUnary(operation, (x) => -x); + break; + + case Instruction.FP32 | Instruction.Subtract: + EvaluateFPBinary(operation, (x, y) => x - y); + break; + + case Instruction.IsNan: + EvaluateFPUnary(operation, (x) => float.IsNaN(x)); + break; + + case Instruction.LoadConstant: + operation.TurnIntoCopy(Cbuf(operation.GetSource(0).Value, operation.GetSource(1).Value)); + break; + + case Instruction.Maximum: + EvaluateBinary(operation, (x, y) => Math.Max(x, y)); + break; + + case Instruction.MaximumU32: + EvaluateBinary(operation, (x, y) => (int)Math.Max((uint)x, (uint)y)); + break; + + case Instruction.Minimum: + EvaluateBinary(operation, (x, y) => Math.Min(x, y)); + break; + + case Instruction.MinimumU32: + EvaluateBinary(operation, (x, y) => (int)Math.Min((uint)x, (uint)y)); + break; + + case Instruction.Multiply: + EvaluateBinary(operation, (x, y) => x * y); + break; + + case Instruction.Negate: + EvaluateUnary(operation, (x) => -x); + break; + + case Instruction.ShiftLeft: + EvaluateBinary(operation, (x, y) => x << y); + break; + + case Instruction.ShiftRightS32: + EvaluateBinary(operation, (x, y) => x >> y); + break; + + case Instruction.ShiftRightU32: + EvaluateBinary(operation, (x, y) => (int)((uint)x >> y)); + break; + + case Instruction.Subtract: + EvaluateBinary(operation, (x, y) => x - y); + break; + + case Instruction.UnpackHalf2x16: + UnpackHalf2x16(operation); + break; + } + } + + private static bool AreAllSourcesConstant(Operation operation) + { + for (int index = 0; index < operation.SourcesCount; index++) + { + if (operation.GetSource(index).Type != OperandType.Constant) + { + return false; + } + } + + return true; + } + + private static int BitCount(int value) + { + int count = 0; + + for (int bit = 0; bit < 32; bit++) + { + if (value.Extract(bit)) + { + count++; + } + } + + return count; + } + + private static void BitfieldExtractS32(Operation operation) + { + int value = GetBitfieldExtractValue(operation); + + int shift = 32 - operation.GetSource(2).Value; + + value = (value << shift) >> shift; + + operation.TurnIntoCopy(Const(value)); + } + + private static void BitfieldExtractU32(Operation operation) + { + operation.TurnIntoCopy(Const(GetBitfieldExtractValue(operation))); + } + + private static int GetBitfieldExtractValue(Operation operation) + { + int value = operation.GetSource(0).Value; + int lsb = operation.GetSource(1).Value; + int length = operation.GetSource(2).Value; + + return value.Extract(lsb, length); + } + + private static void UnpackHalf2x16(Operation operation) + { + int value = operation.GetSource(0).Value; + + value = (value >> operation.Index * 16) & 0xffff; + + operation.TurnIntoCopy(ConstF((float)BitConverter.UInt16BitsToHalf((ushort)value))); + } + + private static void FPNegate(Operation operation) + { + float value = operation.GetSource(0).AsFloat(); + + operation.TurnIntoCopy(ConstF(-value)); + } + + private static void EvaluateUnary(Operation operation, Func<int, int> op) + { + int x = operation.GetSource(0).Value; + + operation.TurnIntoCopy(Const(op(x))); + } + + private static void EvaluateFPUnary(Operation operation, Func<float, float> op) + { + float x = operation.GetSource(0).AsFloat(); + + operation.TurnIntoCopy(ConstF(op(x))); + } + + private static void EvaluateFPUnary(Operation operation, Func<float, bool> op) + { + float x = operation.GetSource(0).AsFloat(); + + operation.TurnIntoCopy(Const(op(x) ? IrConsts.True : IrConsts.False)); + } + + private static void EvaluateBinary(Operation operation, Func<int, int, int> op) + { + int x = operation.GetSource(0).Value; + int y = operation.GetSource(1).Value; + + operation.TurnIntoCopy(Const(op(x, y))); + } + + private static void EvaluateBinary(Operation operation, Func<int, int, bool> op) + { + int x = operation.GetSource(0).Value; + int y = operation.GetSource(1).Value; + + operation.TurnIntoCopy(Const(op(x, y) ? IrConsts.True : IrConsts.False)); + } + + private static void EvaluateFPBinary(Operation operation, Func<float, float, float> op) + { + float x = operation.GetSource(0).AsFloat(); + float y = operation.GetSource(1).AsFloat(); + + operation.TurnIntoCopy(ConstF(op(x, y))); + } + + private static void EvaluateFPBinary(Operation operation, Func<float, float, bool> op) + { + float x = operation.GetSource(0).AsFloat(); + float y = operation.GetSource(1).AsFloat(); + + operation.TurnIntoCopy(Const(op(x, y) ? IrConsts.True : IrConsts.False)); + } + + private static void EvaluateTernary(Operation operation, Func<int, int, int, int> op) + { + int x = operation.GetSource(0).Value; + int y = operation.GetSource(1).Value; + int z = operation.GetSource(2).Value; + + operation.TurnIntoCopy(Const(op(x, y, z))); + } + + private static void EvaluateFPTernary(Operation operation, Func<float, float, float, float> op) + { + float x = operation.GetSource(0).AsFloat(); + float y = operation.GetSource(1).AsFloat(); + float z = operation.GetSource(2).AsFloat(); + + operation.TurnIntoCopy(ConstF(op(x, y, z))); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs new file mode 100644 index 00000000..2a4070e0 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs @@ -0,0 +1,433 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; +using static Ryujinx.Graphics.Shader.Translation.GlobalMemory; + +namespace Ryujinx.Graphics.Shader.Translation.Optimizations +{ + static class GlobalToStorage + { + public static void RunPass(BasicBlock block, ShaderConfig config, ref int sbUseMask, ref int ubeUseMask) + { + int sbStart = GetStorageBaseCbOffset(config.Stage); + int sbEnd = sbStart + StorageDescsSize; + + int ubeStart = UbeBaseOffset; + int ubeEnd = UbeBaseOffset + UbeDescsSize; + + for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next) + { + for (int index = 0; index < node.Value.SourcesCount; index++) + { + Operand src = node.Value.GetSource(index); + + int storageIndex = GetStorageIndex(src, sbStart, sbEnd); + + if (storageIndex >= 0) + { + sbUseMask |= 1 << storageIndex; + } + + if (config.Stage == ShaderStage.Compute) + { + int constantIndex = GetStorageIndex(src, ubeStart, ubeEnd); + + if (constantIndex >= 0) + { + ubeUseMask |= 1 << constantIndex; + } + } + } + + if (!(node.Value is Operation operation)) + { + continue; + } + + if (UsesGlobalMemory(operation.Inst, operation.StorageKind)) + { + Operand source = operation.GetSource(0); + + int storageIndex = SearchForStorageBase(block, source, sbStart, sbEnd); + + if (storageIndex >= 0) + { + // Storage buffers are implemented using global memory access. + // If we know from where the base address of the access is loaded, + // we can guess which storage buffer it is accessing. + // We can then replace the global memory access with a storage + // buffer access. + node = ReplaceGlobalWithStorage(block, node, config, storageIndex); + } + else if (config.Stage == ShaderStage.Compute && operation.Inst == Instruction.LoadGlobal) + { + // Here we effectively try to replace a LDG instruction with LDC. + // The hardware only supports a limited amount of constant buffers + // so NVN "emulates" more constant buffers using global memory access. + // Here we try to replace the global access back to a constant buffer + // load. + storageIndex = SearchForStorageBase(block, source, ubeStart, ubeStart + ubeEnd); + + if (storageIndex >= 0) + { + node = ReplaceLdgWithLdc(node, config, storageIndex); + } + } + } + } + + config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask); + } + + private static LinkedListNode<INode> ReplaceGlobalWithStorage(BasicBlock block, LinkedListNode<INode> node, ShaderConfig config, int storageIndex) + { + Operation operation = (Operation)node.Value; + + bool isAtomic = operation.Inst.IsAtomic(); + bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8; + bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8; + + config.SetUsedStorageBuffer(storageIndex, isWrite); + + Operand[] sources = new Operand[operation.SourcesCount]; + + sources[0] = Const(storageIndex); + sources[1] = GetStorageOffset(block, node, config, storageIndex, operation.GetSource(0), isStg16Or8); + + for (int index = 2; index < operation.SourcesCount; index++) + { + sources[index] = operation.GetSource(index); + } + + Operation storageOp; + + if (isAtomic) + { + storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources); + } + else if (operation.Inst == Instruction.LoadGlobal) + { + storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources); + } + else + { + Instruction storeInst = operation.Inst switch + { + Instruction.StoreGlobal16 => Instruction.StoreStorage16, + Instruction.StoreGlobal8 => Instruction.StoreStorage8, + _ => Instruction.StoreStorage + }; + + storageOp = new Operation(storeInst, null, sources); + } + + for (int index = 0; index < operation.SourcesCount; index++) + { + operation.SetSource(index, null); + } + + LinkedListNode<INode> oldNode = node; + + node = node.List.AddBefore(node, storageOp); + + node.List.Remove(oldNode); + + return node; + } + + private static Operand GetStorageOffset( + BasicBlock block, + LinkedListNode<INode> node, + ShaderConfig config, + int storageIndex, + Operand addrLow, + bool isStg16Or8) + { + int baseAddressCbOffset = GetStorageCbOffset(config.Stage, storageIndex); + + bool storageAligned = !(config.GpuAccessor.QueryHasUnalignedStorageBuffer() || config.GpuAccessor.QueryHostStorageBufferOffsetAlignment() > Constants.StorageAlignment); + + (Operand byteOffset, int constantOffset) = storageAligned ? + GetStorageOffset(block, Utils.FindLastOperation(addrLow, block), baseAddressCbOffset) : + (null, 0); + + if (byteOffset != null) + { + ReplaceAddressAlignment(node.List, addrLow, byteOffset, constantOffset); + } + + if (byteOffset == null) + { + Operand baseAddrLow = Cbuf(0, baseAddressCbOffset); + Operand baseAddrTrunc = Local(); + + Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment()); + + Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask); + + node.List.AddBefore(node, andOp); + + Operand offset = Local(); + Operation subOp = new Operation(Instruction.Subtract, offset, addrLow, baseAddrTrunc); + + node.List.AddBefore(node, subOp); + + byteOffset = offset; + } + else if (constantOffset != 0) + { + Operand offset = Local(); + Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset)); + + node.List.AddBefore(node, addOp); + + byteOffset = offset; + } + + if (isStg16Or8) + { + return byteOffset; + } + + Operand wordOffset = Local(); + Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2)); + + node.List.AddBefore(node, shrOp); + + return wordOffset; + } + + private static bool IsCb0Offset(Operand operand, int offset) + { + return operand.Type == OperandType.ConstantBuffer && operand.GetCbufSlot() == 0 && operand.GetCbufOffset() == offset; + } + + private static void ReplaceAddressAlignment(LinkedList<INode> list, Operand address, Operand byteOffset, int constantOffset) + { + // When we emit 16/8-bit LDG, we add extra code to determine the address alignment. + // Eliminate the storage buffer base address from this too, leaving only the byte offset. + + foreach (INode useNode in address.UseOps) + { + if (useNode is Operation op && op.Inst == Instruction.BitwiseAnd) + { + Operand src1 = op.GetSource(0); + Operand src2 = op.GetSource(1); + + int addressIndex = -1; + + if (src1 == address && src2.Type == OperandType.Constant && src2.Value == 3) + { + addressIndex = 0; + } + else if (src2 == address && src1.Type == OperandType.Constant && src1.Value == 3) + { + addressIndex = 1; + } + + if (addressIndex != -1) + { + LinkedListNode<INode> node = list.Find(op); + + // Add offset calculation before the use. Needs to be on the same block. + if (node != null) + { + Operand offset = Local(); + Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset)); + list.AddBefore(node, addOp); + + op.SetSource(addressIndex, offset); + } + } + } + } + } + + private static (Operand, int) GetStorageOffset(BasicBlock block, Operand address, int baseAddressCbOffset) + { + if (IsCb0Offset(address, baseAddressCbOffset)) + { + // Direct offset: zero. + return (Const(0), 0); + } + + (address, int constantOffset) = GetStorageConstantOffset(block, address); + + address = Utils.FindLastOperation(address, block); + + if (IsCb0Offset(address, baseAddressCbOffset)) + { + // Only constant offset + return (Const(0), constantOffset); + } + + if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add) + { + return (null, 0); + } + + Operand src1 = offsetAdd.GetSource(0); + Operand src2 = Utils.FindLastOperation(offsetAdd.GetSource(1), block); + + if (IsCb0Offset(src2, baseAddressCbOffset)) + { + return (src1, constantOffset); + } + else if (IsCb0Offset(src1, baseAddressCbOffset)) + { + return (src2, constantOffset); + } + + return (null, 0); + } + + private static (Operand, int) GetStorageConstantOffset(BasicBlock block, Operand address) + { + if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add) + { + return (address, 0); + } + + Operand src1 = offsetAdd.GetSource(0); + Operand src2 = offsetAdd.GetSource(1); + + if (src2.Type != OperandType.Constant) + { + return (address, 0); + } + + return (src1, src2.Value); + } + + private static LinkedListNode<INode> ReplaceLdgWithLdc(LinkedListNode<INode> node, ShaderConfig config, int storageIndex) + { + Operation operation = (Operation)node.Value; + + Operand GetCbufOffset() + { + Operand addrLow = operation.GetSource(0); + + Operand baseAddrLow = Cbuf(0, UbeBaseOffset + storageIndex * StorageDescSize); + + Operand baseAddrTrunc = Local(); + + Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment()); + + Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask); + + node.List.AddBefore(node, andOp); + + Operand byteOffset = Local(); + Operand wordOffset = Local(); + + Operation subOp = new Operation(Instruction.Subtract, byteOffset, addrLow, baseAddrTrunc); + Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2)); + + node.List.AddBefore(node, subOp); + node.List.AddBefore(node, shrOp); + + return wordOffset; + } + + Operand[] sources = new Operand[operation.SourcesCount]; + + int cbSlot = UbeFirstCbuf + storageIndex; + + sources[0] = Const(cbSlot); + sources[1] = GetCbufOffset(); + + config.SetUsedConstantBuffer(cbSlot); + + for (int index = 2; index < operation.SourcesCount; index++) + { + sources[index] = operation.GetSource(index); + } + + Operation ldcOp = new Operation(Instruction.LoadConstant, operation.Dest, sources); + + for (int index = 0; index < operation.SourcesCount; index++) + { + operation.SetSource(index, null); + } + + LinkedListNode<INode> oldNode = node; + + node = node.List.AddBefore(node, ldcOp); + + node.List.Remove(oldNode); + + return node; + } + + private static int SearchForStorageBase(BasicBlock block, Operand globalAddress, int sbStart, int sbEnd) + { + globalAddress = Utils.FindLastOperation(globalAddress, block); + + if (globalAddress.Type == OperandType.ConstantBuffer) + { + return GetStorageIndex(globalAddress, sbStart, sbEnd); + } + + Operation operation = globalAddress.AsgOp as Operation; + + if (operation == null || operation.Inst != Instruction.Add) + { + return -1; + } + + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + if ((src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant) || + (src2.Type == OperandType.LocalVariable && src1.Type == OperandType.Constant)) + { + if (src1.Type == OperandType.LocalVariable) + { + operation = Utils.FindLastOperation(src1, block).AsgOp as Operation; + } + else + { + operation = Utils.FindLastOperation(src2, block).AsgOp as Operation; + } + + if (operation == null || operation.Inst != Instruction.Add) + { + return -1; + } + } + + for (int index = 0; index < operation.SourcesCount; index++) + { + Operand source = operation.GetSource(index); + + int storageIndex = GetStorageIndex(source, sbStart, sbEnd); + + if (storageIndex != -1) + { + return storageIndex; + } + } + + return -1; + } + + private static int GetStorageIndex(Operand operand, int sbStart, int sbEnd) + { + if (operand.Type == OperandType.ConstantBuffer) + { + int slot = operand.GetCbufSlot(); + int offset = operand.GetCbufOffset(); + + if (slot == 0 && offset >= sbStart && offset < sbEnd) + { + int storageIndex = (offset - sbStart) / StorageDescSize; + + return storageIndex; + } + } + + return -1; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs new file mode 100644 index 00000000..bae774ee --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs @@ -0,0 +1,380 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; + +namespace Ryujinx.Graphics.Shader.Translation.Optimizations +{ + static class Optimizer + { + public static void RunPass(BasicBlock[] blocks, ShaderConfig config) + { + RunOptimizationPasses(blocks); + + int sbUseMask = 0; + int ubeUseMask = 0; + + // Those passes are looking for specific patterns and only needs to run once. + for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) + { + GlobalToStorage.RunPass(blocks[blkIndex], config, ref sbUseMask, ref ubeUseMask); + BindlessToIndexed.RunPass(blocks[blkIndex], config); + BindlessElimination.RunPass(blocks[blkIndex], config); + } + + config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask); + + // Run optimizations one last time to remove any code that is now optimizable after above passes. + RunOptimizationPasses(blocks); + } + + private static void RunOptimizationPasses(BasicBlock[] blocks) + { + bool modified; + + do + { + modified = false; + + for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) + { + BasicBlock block = blocks[blkIndex]; + + LinkedListNode<INode> node = block.Operations.First; + + while (node != null) + { + LinkedListNode<INode> nextNode = node.Next; + + bool isUnused = IsUnused(node.Value); + + if (!(node.Value is Operation operation) || isUnused) + { + if (node.Value is PhiNode phi && !isUnused) + { + isUnused = PropagatePhi(phi); + } + + if (isUnused) + { + RemoveNode(block, node); + + modified = true; + } + + node = nextNode; + + continue; + } + + ConstantFolding.RunPass(operation); + + Simplification.RunPass(operation); + + if (DestIsLocalVar(operation)) + { + if (operation.Inst == Instruction.Copy) + { + PropagateCopy(operation); + + RemoveNode(block, node); + + modified = true; + } + else if ((operation.Inst == Instruction.PackHalf2x16 && PropagatePack(operation)) || + (operation.Inst == Instruction.ShuffleXor && MatchDdxOrDdy(operation))) + { + if (DestHasNoUses(operation)) + { + RemoveNode(block, node); + } + + modified = true; + } + } + + node = nextNode; + } + + if (BranchElimination.RunPass(block)) + { + RemoveNode(block, block.Operations.Last); + + modified = true; + } + } + } + while (modified); + } + + private static void PropagateCopy(Operation copyOp) + { + // Propagate copy source operand to all uses of + // the destination operand. + + Operand dest = copyOp.Dest; + Operand src = copyOp.GetSource(0); + + INode[] uses = dest.UseOps.ToArray(); + + foreach (INode useNode in uses) + { + for (int index = 0; index < useNode.SourcesCount; index++) + { + if (useNode.GetSource(index) == dest) + { + useNode.SetSource(index, src); + } + } + } + } + + private static bool PropagatePhi(PhiNode phi) + { + // If all phi sources are the same, we can propagate it and remove the phi. + + Operand firstSrc = phi.GetSource(0); + + for (int index = 1; index < phi.SourcesCount; index++) + { + if (!IsSameOperand(firstSrc, phi.GetSource(index))) + { + return false; + } + } + + // All sources are equal, we can propagate the value. + + Operand dest = phi.Dest; + + INode[] uses = dest.UseOps.ToArray(); + + foreach (INode useNode in uses) + { + for (int index = 0; index < useNode.SourcesCount; index++) + { + if (useNode.GetSource(index) == dest) + { + useNode.SetSource(index, firstSrc); + } + } + } + + return true; + } + + private static bool IsSameOperand(Operand x, Operand y) + { + if (x.Type != y.Type || x.Value != y.Value) + { + return false; + } + + // TODO: Handle Load operations with the same storage and the same constant parameters. + return x.Type == OperandType.Constant || x.Type == OperandType.ConstantBuffer; + } + + private static bool PropagatePack(Operation packOp) + { + // Propagate pack source operands to uses by unpack + // instruction. The source depends on the unpack instruction. + bool modified = false; + + Operand dest = packOp.Dest; + Operand src0 = packOp.GetSource(0); + Operand src1 = packOp.GetSource(1); + + INode[] uses = dest.UseOps.ToArray(); + + foreach (INode useNode in uses) + { + if (!(useNode is Operation operation) || operation.Inst != Instruction.UnpackHalf2x16) + { + continue; + } + + if (operation.GetSource(0) == dest) + { + operation.TurnIntoCopy(operation.Index == 1 ? src1 : src0); + + modified = true; + } + } + + return modified; + } + + public static bool MatchDdxOrDdy(Operation operation) + { + // It's assumed that "operation.Inst" is ShuffleXor, + // that should be checked before calling this method. + Debug.Assert(operation.Inst == Instruction.ShuffleXor); + + bool modified = false; + + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + if (src2.Type != OperandType.Constant || (src2.Value != 1 && src2.Value != 2)) + { + return false; + } + + if (src3.Type != OperandType.Constant || src3.Value != 0x1c03) + { + return false; + } + + bool isDdy = src2.Value == 2; + bool isDdx = !isDdy; + + // We can replace any use by a FSWZADD with DDX/DDY, when + // the following conditions are true: + // - The mask should be 0b10100101 for DDY, or 0b10011001 for DDX. + // - The first source operand must be the shuffle output. + // - The second source operand must be the shuffle first source operand. + INode[] uses = operation.Dest.UseOps.ToArray(); + + foreach (INode use in uses) + { + if (!(use is Operation test)) + { + continue; + } + + if (!(use is Operation useOp) || useOp.Inst != Instruction.SwizzleAdd) + { + continue; + } + + Operand fswzaddSrc1 = useOp.GetSource(0); + Operand fswzaddSrc2 = useOp.GetSource(1); + Operand fswzaddSrc3 = useOp.GetSource(2); + + if (fswzaddSrc1 != operation.Dest) + { + continue; + } + + if (fswzaddSrc2 != operation.GetSource(0)) + { + continue; + } + + if (fswzaddSrc3.Type != OperandType.Constant) + { + continue; + } + + int mask = fswzaddSrc3.Value; + + if ((isDdx && mask != 0b10011001) || + (isDdy && mask != 0b10100101)) + { + continue; + } + + useOp.TurnInto(isDdx ? Instruction.Ddx : Instruction.Ddy, fswzaddSrc2); + + modified = true; + } + + return modified; + } + + private static void RemoveNode(BasicBlock block, LinkedListNode<INode> llNode) + { + // Remove a node from the nodes list, and also remove itself + // from all the use lists on the operands that this node uses. + block.Operations.Remove(llNode); + + Queue<INode> nodes = new Queue<INode>(); + + nodes.Enqueue(llNode.Value); + + while (nodes.TryDequeue(out INode node)) + { + for (int index = 0; index < node.SourcesCount; index++) + { + Operand src = node.GetSource(index); + + if (src.Type != OperandType.LocalVariable) + { + continue; + } + + if (src.UseOps.Remove(node) && src.UseOps.Count == 0) + { + Debug.Assert(src.AsgOp != null); + nodes.Enqueue(src.AsgOp); + } + } + } + } + + private static bool IsUnused(INode node) + { + return !HasSideEffects(node) && DestIsLocalVar(node) && DestHasNoUses(node); + } + + private static bool HasSideEffects(INode node) + { + if (node is Operation operation) + { + switch (operation.Inst & Instruction.Mask) + { + case Instruction.AtomicAdd: + case Instruction.AtomicAnd: + case Instruction.AtomicCompareAndSwap: + case Instruction.AtomicMaxS32: + case Instruction.AtomicMaxU32: + case Instruction.AtomicMinS32: + case Instruction.AtomicMinU32: + case Instruction.AtomicOr: + case Instruction.AtomicSwap: + case Instruction.AtomicXor: + case Instruction.Call: + case Instruction.ImageAtomic: + return true; + } + } + + return false; + } + + private static bool DestIsLocalVar(INode node) + { + if (node.DestsCount == 0) + { + return false; + } + + for (int index = 0; index < node.DestsCount; index++) + { + Operand dest = node.GetDest(index); + + if (dest != null && dest.Type != OperandType.LocalVariable) + { + return false; + } + } + + return true; + } + + private static bool DestHasNoUses(INode node) + { + for (int index = 0; index < node.DestsCount; index++) + { + Operand dest = node.GetDest(index); + + if (dest != null && dest.UseOps.Count != 0) + { + return false; + } + } + + return true; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs new file mode 100644 index 00000000..8d05f99a --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs @@ -0,0 +1,147 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation.Optimizations +{ + static class Simplification + { + private const int AllOnes = ~0; + + public static void RunPass(Operation operation) + { + switch (operation.Inst) + { + case Instruction.Add: + case Instruction.BitwiseExclusiveOr: + TryEliminateBinaryOpCommutative(operation, 0); + break; + + case Instruction.BitwiseAnd: + TryEliminateBitwiseAnd(operation); + break; + + case Instruction.BitwiseOr: + TryEliminateBitwiseOr(operation); + break; + + case Instruction.ConditionalSelect: + TryEliminateConditionalSelect(operation); + break; + + case Instruction.Divide: + TryEliminateBinaryOpY(operation, 1); + break; + + case Instruction.Multiply: + TryEliminateBinaryOpCommutative(operation, 1); + break; + + case Instruction.ShiftLeft: + case Instruction.ShiftRightS32: + case Instruction.ShiftRightU32: + case Instruction.Subtract: + TryEliminateBinaryOpY(operation, 0); + break; + } + } + + private static void TryEliminateBitwiseAnd(Operation operation) + { + // Try to recognize and optimize those 3 patterns (in order): + // x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y, + // x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000 + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (IsConstEqual(x, AllOnes)) + { + operation.TurnIntoCopy(y); + } + else if (IsConstEqual(y, AllOnes)) + { + operation.TurnIntoCopy(x); + } + else if (IsConstEqual(x, 0) || IsConstEqual(y, 0)) + { + operation.TurnIntoCopy(Const(0)); + } + } + + private static void TryEliminateBitwiseOr(Operation operation) + { + // Try to recognize and optimize those 3 patterns (in order): + // x | 0x00000000 == x, 0x00000000 | y == y, + // x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (IsConstEqual(x, 0)) + { + operation.TurnIntoCopy(y); + } + else if (IsConstEqual(y, 0)) + { + operation.TurnIntoCopy(x); + } + else if (IsConstEqual(x, AllOnes) || IsConstEqual(y, AllOnes)) + { + operation.TurnIntoCopy(Const(AllOnes)); + } + } + + private static void TryEliminateBinaryOpY(Operation operation, int comparand) + { + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (IsConstEqual(y, comparand)) + { + operation.TurnIntoCopy(x); + } + } + + private static void TryEliminateBinaryOpCommutative(Operation operation, int comparand) + { + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (IsConstEqual(x, comparand)) + { + operation.TurnIntoCopy(y); + } + else if (IsConstEqual(y, comparand)) + { + operation.TurnIntoCopy(x); + } + } + + private static void TryEliminateConditionalSelect(Operation operation) + { + Operand cond = operation.GetSource(0); + + if (cond.Type != OperandType.Constant) + { + return; + } + + // The condition is constant, we can turn it into a copy, and select + // the source based on the condition value. + int srcIndex = cond.Value != 0 ? 1 : 2; + + Operand source = operation.GetSource(srcIndex); + + operation.TurnIntoCopy(source); + } + + private static bool IsConstEqual(Operand operand, int comparand) + { + if (operand.Type != OperandType.Constant) + { + return false; + } + + return operand.Value == comparand; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs new file mode 100644 index 00000000..4ca6d687 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs @@ -0,0 +1,68 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; + +namespace Ryujinx.Graphics.Shader.Translation.Optimizations +{ + static class Utils + { + private static Operation FindBranchSource(BasicBlock block) + { + foreach (BasicBlock sourceBlock in block.Predecessors) + { + if (sourceBlock.Operations.Count > 0) + { + if (sourceBlock.GetLastOp() is Operation lastOp && IsConditionalBranch(lastOp.Inst) && sourceBlock.Next == block) + { + return lastOp; + } + } + } + + return null; + } + + private static bool IsConditionalBranch(Instruction inst) + { + return inst == Instruction.BranchIfFalse || inst == Instruction.BranchIfTrue; + } + + private static bool BlockConditionsMatch(BasicBlock currentBlock, BasicBlock queryBlock) + { + // Check if all the conditions for the query block are satisfied by the current block. + // Just checks the top-most conditional for now. + + Operation currentBranch = FindBranchSource(currentBlock); + Operation queryBranch = FindBranchSource(queryBlock); + + Operand currentCondition = currentBranch?.GetSource(0); + Operand queryCondition = queryBranch?.GetSource(0); + + // The condition should be the same operand instance. + + return currentBranch != null && queryBranch != null && + currentBranch.Inst == queryBranch.Inst && + currentCondition == queryCondition; + } + + public static Operand FindLastOperation(Operand source, BasicBlock block) + { + if (source.AsgOp is PhiNode phiNode) + { + // This source can have a different value depending on a previous branch. + // Ensure that conditions met for that branch are also met for the current one. + // Prefer the latest sources for the phi node. + + for (int i = phiNode.SourcesCount - 1; i >= 0; i--) + { + BasicBlock phiBlock = phiNode.GetBlock(i); + + if (BlockConditionsMatch(block, phiBlock)) + { + return phiNode.GetSource(i); + } + } + } + + return source; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/RegisterUsage.cs b/src/Ryujinx.Graphics.Shader/Translation/RegisterUsage.cs new file mode 100644 index 00000000..9e31831d --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/RegisterUsage.cs @@ -0,0 +1,486 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Numerics; + +namespace Ryujinx.Graphics.Shader.Translation +{ + static class RegisterUsage + { + private const int RegsCount = 256; + private const int RegsMask = RegsCount - 1; + + private const int GprMasks = 4; + private const int PredMasks = 1; + private const int FlagMasks = 1; + private const int TotalMasks = GprMasks + PredMasks + FlagMasks; + + private struct RegisterMask : IEquatable<RegisterMask> + { + public long GprMask0 { get; set; } + public long GprMask1 { get; set; } + public long GprMask2 { get; set; } + public long GprMask3 { get; set; } + public long PredMask { get; set; } + public long FlagMask { get; set; } + + public RegisterMask(long gprMask0, long gprMask1, long gprMask2, long gprMask3, long predMask, long flagMask) + { + GprMask0 = gprMask0; + GprMask1 = gprMask1; + GprMask2 = gprMask2; + GprMask3 = gprMask3; + PredMask = predMask; + FlagMask = flagMask; + } + + public long GetMask(int index) + { + return index switch + { + 0 => GprMask0, + 1 => GprMask1, + 2 => GprMask2, + 3 => GprMask3, + 4 => PredMask, + 5 => FlagMask, + _ => throw new ArgumentOutOfRangeException(nameof(index)) + }; + } + + public static RegisterMask operator &(RegisterMask x, RegisterMask y) + { + return new RegisterMask( + x.GprMask0 & y.GprMask0, + x.GprMask1 & y.GprMask1, + x.GprMask2 & y.GprMask2, + x.GprMask3 & y.GprMask3, + x.PredMask & y.PredMask, + x.FlagMask & y.FlagMask); + } + + public static RegisterMask operator |(RegisterMask x, RegisterMask y) + { + return new RegisterMask( + x.GprMask0 | y.GprMask0, + x.GprMask1 | y.GprMask1, + x.GprMask2 | y.GprMask2, + x.GprMask3 | y.GprMask3, + x.PredMask | y.PredMask, + x.FlagMask | y.FlagMask); + } + + public static RegisterMask operator ~(RegisterMask x) + { + return new RegisterMask( + ~x.GprMask0, + ~x.GprMask1, + ~x.GprMask2, + ~x.GprMask3, + ~x.PredMask, + ~x.FlagMask); + } + + public static bool operator ==(RegisterMask x, RegisterMask y) + { + return x.Equals(y); + } + + public static bool operator !=(RegisterMask x, RegisterMask y) + { + return !x.Equals(y); + } + + public override bool Equals(object obj) + { + return obj is RegisterMask regMask && Equals(regMask); + } + + public bool Equals(RegisterMask other) + { + return GprMask0 == other.GprMask0 && + GprMask1 == other.GprMask1 && + GprMask2 == other.GprMask2 && + GprMask3 == other.GprMask3 && + PredMask == other.PredMask && + FlagMask == other.FlagMask; + } + + public override int GetHashCode() + { + return HashCode.Combine(GprMask0, GprMask1, GprMask2, GprMask3, PredMask, FlagMask); + } + } + + public readonly struct FunctionRegisterUsage + { + public Register[] InArguments { get; } + public Register[] OutArguments { get; } + + public FunctionRegisterUsage(Register[] inArguments, Register[] outArguments) + { + InArguments = inArguments; + OutArguments = outArguments; + } + } + + public static FunctionRegisterUsage RunPass(ControlFlowGraph cfg) + { + List<Register> inArguments = new List<Register>(); + List<Register> outArguments = new List<Register>(); + + // Compute local register inputs and outputs used inside blocks. + RegisterMask[] localInputs = new RegisterMask[cfg.Blocks.Length]; + RegisterMask[] localOutputs = new RegisterMask[cfg.Blocks.Length]; + + foreach (BasicBlock block in cfg.Blocks) + { + for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next) + { + Operation operation = node.Value as Operation; + + for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++) + { + Operand source = operation.GetSource(srcIndex); + + if (source.Type != OperandType.Register) + { + continue; + } + + Register register = source.GetRegister(); + + localInputs[block.Index] |= GetMask(register) & ~localOutputs[block.Index]; + } + + if (operation.Dest != null && operation.Dest.Type == OperandType.Register) + { + localOutputs[block.Index] |= GetMask(operation.Dest.GetRegister()); + } + } + } + + // Compute global register inputs and outputs used across blocks. + RegisterMask[] globalCmnOutputs = new RegisterMask[cfg.Blocks.Length]; + + RegisterMask[] globalInputs = new RegisterMask[cfg.Blocks.Length]; + RegisterMask[] globalOutputs = new RegisterMask[cfg.Blocks.Length]; + + RegisterMask allOutputs = new RegisterMask(); + RegisterMask allCmnOutputs = new RegisterMask(-1L, -1L, -1L, -1L, -1L, -1L); + + bool modified; + + bool firstPass = true; + + do + { + modified = false; + + // Compute register outputs. + for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + if (block.Predecessors.Count != 0) + { + BasicBlock predecessor = block.Predecessors[0]; + + RegisterMask cmnOutputs = localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index]; + + RegisterMask outputs = globalOutputs[predecessor.Index]; + + for (int pIndex = 1; pIndex < block.Predecessors.Count; pIndex++) + { + predecessor = block.Predecessors[pIndex]; + + cmnOutputs &= localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index]; + + outputs |= globalOutputs[predecessor.Index]; + } + + globalInputs[block.Index] |= outputs & ~cmnOutputs; + + if (!firstPass) + { + cmnOutputs &= globalCmnOutputs[block.Index]; + } + + if (EndsWithReturn(block)) + { + allCmnOutputs &= cmnOutputs | localOutputs[block.Index]; + } + + if (Exchange(globalCmnOutputs, block.Index, cmnOutputs)) + { + modified = true; + } + + outputs |= localOutputs[block.Index]; + + if (Exchange(globalOutputs, block.Index, globalOutputs[block.Index] | outputs)) + { + allOutputs |= outputs; + modified = true; + } + } + else if (Exchange(globalOutputs, block.Index, localOutputs[block.Index])) + { + allOutputs |= localOutputs[block.Index]; + modified = true; + } + } + + // Compute register inputs. + for (int index = 0; index < cfg.PostOrderBlocks.Length; index++) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + RegisterMask inputs = localInputs[block.Index]; + + if (block.Next != null) + { + inputs |= globalInputs[block.Next.Index]; + } + + if (block.Branch != null) + { + inputs |= globalInputs[block.Branch.Index]; + } + + inputs &= ~globalCmnOutputs[block.Index]; + + if (Exchange(globalInputs, block.Index, globalInputs[block.Index] | inputs)) + { + modified = true; + } + } + + firstPass = false; + } + while (modified); + + // Insert load and store context instructions where needed. + foreach (BasicBlock block in cfg.Blocks) + { + // The only block without any predecessor should be the entry block. + // It always needs a context load as it is the first block to run. + if (block.Predecessors.Count == 0) + { + RegisterMask inputs = globalInputs[block.Index] | (allOutputs & ~allCmnOutputs); + + LoadLocals(block, inputs, inArguments); + } + + if (EndsWithReturn(block)) + { + StoreLocals(block, allOutputs, inArguments.Count, outArguments); + } + } + + return new FunctionRegisterUsage(inArguments.ToArray(), outArguments.ToArray()); + } + + public static void FixupCalls(BasicBlock[] blocks, FunctionRegisterUsage[] frus) + { + foreach (BasicBlock block in blocks) + { + for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next) + { + Operation operation = node.Value as Operation; + + if (operation.Inst == Instruction.Call) + { + Operand funcId = operation.GetSource(0); + + Debug.Assert(funcId.Type == OperandType.Constant); + + var fru = frus[funcId.Value]; + + Operand[] inRegs = new Operand[fru.InArguments.Length]; + + for (int i = 0; i < fru.InArguments.Length; i++) + { + inRegs[i] = OperandHelper.Register(fru.InArguments[i]); + } + + operation.AppendSources(inRegs); + + Operand[] outRegs = new Operand[1 + fru.OutArguments.Length]; + + for (int i = 0; i < fru.OutArguments.Length; i++) + { + outRegs[1 + i] = OperandHelper.Register(fru.OutArguments[i]); + } + + operation.AppendDests(outRegs); + } + } + } + } + + private static bool StartsWith(BasicBlock block, Instruction inst) + { + if (block.Operations.Count == 0) + { + return false; + } + + return block.Operations.First.Value is Operation operation && operation.Inst == inst; + } + + private static bool EndsWith(BasicBlock block, Instruction inst) + { + if (block.Operations.Count == 0) + { + return false; + } + + return block.Operations.Last.Value is Operation operation && operation.Inst == inst; + } + + private static RegisterMask GetMask(Register register) + { + Span<long> gprMasks = stackalloc long[4]; + long predMask = 0; + long flagMask = 0; + + switch (register.Type) + { + case RegisterType.Gpr: + gprMasks[register.Index >> 6] = 1L << (register.Index & 0x3f); + break; + case RegisterType.Predicate: + predMask = 1L << register.Index; + break; + case RegisterType.Flag: + flagMask = 1L << register.Index; + break; + } + + return new RegisterMask(gprMasks[0], gprMasks[1], gprMasks[2], gprMasks[3], predMask, flagMask); + } + + private static bool Exchange(RegisterMask[] masks, int blkIndex, RegisterMask value) + { + RegisterMask oldValue = masks[blkIndex]; + + masks[blkIndex] = value; + + return oldValue != value; + } + + private static void LoadLocals(BasicBlock block, RegisterMask masks, List<Register> inArguments) + { + bool fillArgsList = inArguments.Count == 0; + LinkedListNode<INode> node = null; + int argIndex = 0; + + for (int i = 0; i < TotalMasks; i++) + { + (RegisterType regType, int baseRegIndex) = GetRegTypeAndBaseIndex(i); + long mask = masks.GetMask(i); + + while (mask != 0) + { + int bit = BitOperations.TrailingZeroCount(mask); + + mask &= ~(1L << bit); + + Register register = new Register(baseRegIndex + bit, regType); + + if (fillArgsList) + { + inArguments.Add(register); + } + + Operation copyOp = new Operation(Instruction.Copy, OperandHelper.Register(register), OperandHelper.Argument(argIndex++)); + + if (node == null) + { + node = block.Operations.AddFirst(copyOp); + } + else + { + node = block.Operations.AddAfter(node, copyOp); + } + } + } + + Debug.Assert(argIndex <= inArguments.Count); + } + + private static void StoreLocals(BasicBlock block, RegisterMask masks, int inArgumentsCount, List<Register> outArguments) + { + LinkedListNode<INode> node = null; + int argIndex = inArgumentsCount; + bool fillArgsList = outArguments.Count == 0; + + for (int i = 0; i < TotalMasks; i++) + { + (RegisterType regType, int baseRegIndex) = GetRegTypeAndBaseIndex(i); + long mask = masks.GetMask(i); + + while (mask != 0) + { + int bit = BitOperations.TrailingZeroCount(mask); + + mask &= ~(1L << bit); + + Register register = new Register(baseRegIndex + bit, regType); + + if (fillArgsList) + { + outArguments.Add(register); + } + + Operation copyOp = new Operation(Instruction.Copy, OperandHelper.Argument(argIndex++), OperandHelper.Register(register)); + + if (node == null) + { + node = block.Operations.AddBefore(block.Operations.Last, copyOp); + } + else + { + node = block.Operations.AddAfter(node, copyOp); + } + } + } + + Debug.Assert(argIndex <= inArgumentsCount + outArguments.Count); + } + + private static (RegisterType RegType, int BaseRegIndex) GetRegTypeAndBaseIndex(int i) + { + RegisterType regType = RegisterType.Gpr; + int baseRegIndex = 0; + + if (i < GprMasks) + { + baseRegIndex = i * sizeof(long) * 8; + } + else if (i == GprMasks) + { + regType = RegisterType.Predicate; + } + else + { + regType = RegisterType.Flag; + } + + return (regType, baseRegIndex); + } + + private static bool EndsWithReturn(BasicBlock block) + { + if (!(block.GetLastOp() is Operation operation)) + { + return false; + } + + return operation.Inst == Instruction.Return; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs b/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs new file mode 100644 index 00000000..91e7ace1 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs @@ -0,0 +1,768 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Numerics; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; +using static Ryujinx.Graphics.Shader.Translation.GlobalMemory; + +namespace Ryujinx.Graphics.Shader.Translation +{ + static class Rewriter + { + public static void RunPass(BasicBlock[] blocks, ShaderConfig config) + { + bool isVertexShader = config.Stage == ShaderStage.Vertex; + bool hasConstantBufferDrawParameters = config.GpuAccessor.QueryHasConstantBufferDrawParameters(); + bool supportsSnormBufferTextureFormat = config.GpuAccessor.QueryHostSupportsSnormBufferTextureFormat(); + + for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) + { + BasicBlock block = blocks[blkIndex]; + + for (LinkedListNode<INode> node = block.Operations.First; node != null;) + { + if (node.Value is not Operation operation) + { + node = node.Next; + continue; + } + + if (isVertexShader) + { + if (hasConstantBufferDrawParameters) + { + if (ReplaceConstantBufferWithDrawParameters(node, operation)) + { + config.SetUsedFeature(FeatureFlags.DrawParameters); + } + } + else if (HasConstantBufferDrawParameters(operation)) + { + config.SetUsedFeature(FeatureFlags.DrawParameters); + } + } + + LinkedListNode<INode> nextNode = node.Next; + + if (operation is TextureOperation texOp) + { + if (texOp.Inst == Instruction.TextureSample) + { + node = RewriteTextureSample(node, config); + + if (texOp.Type == SamplerType.TextureBuffer && !supportsSnormBufferTextureFormat) + { + node = InsertSnormNormalization(node, config); + } + } + + nextNode = node.Next; + } + else if (UsesGlobalMemory(operation.Inst, operation.StorageKind)) + { + nextNode = RewriteGlobalAccess(node, config)?.Next ?? nextNode; + } + + node = nextNode; + } + } + } + + private static LinkedListNode<INode> RewriteGlobalAccess(LinkedListNode<INode> node, ShaderConfig config) + { + Operation operation = (Operation)node.Value; + + bool isAtomic = operation.Inst.IsAtomic(); + bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8; + bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8; + + Operation storageOp = null; + + Operand PrependOperation(Instruction inst, params Operand[] sources) + { + Operand local = Local(); + + node.List.AddBefore(node, new Operation(inst, local, sources)); + + return local; + } + + Operand PrependExistingOperation(Operation operation) + { + Operand local = Local(); + + operation.Dest = local; + node.List.AddBefore(node, operation); + + return local; + } + + Operand addrLow = operation.GetSource(0); + Operand addrHigh = operation.GetSource(1); + + Operand sbBaseAddrLow = Const(0); + Operand sbSlot = Const(0); + + Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment()); + + Operand BindingRangeCheck(int cbOffset, out Operand baseAddrLow) + { + baseAddrLow = Cbuf(0, cbOffset); + Operand baseAddrHigh = Cbuf(0, cbOffset + 1); + Operand size = Cbuf(0, cbOffset + 2); + + Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow); + Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow); + + Operand inRangeLow = PrependOperation(Instruction.CompareLessU32, offset, size); + + Operand addrHighBorrowed = PrependOperation(Instruction.Add, addrHigh, borrow); + + Operand inRangeHigh = PrependOperation(Instruction.CompareEqual, addrHighBorrowed, baseAddrHigh); + + return PrependOperation(Instruction.BitwiseAnd, inRangeLow, inRangeHigh); + } + + int sbUseMask = config.AccessibleStorageBuffersMask; + + while (sbUseMask != 0) + { + int slot = BitOperations.TrailingZeroCount(sbUseMask); + + sbUseMask &= ~(1 << slot); + + config.SetUsedStorageBuffer(slot, isWrite); + + int cbOffset = GetStorageCbOffset(config.Stage, slot); + + Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow); + + sbBaseAddrLow = PrependOperation(Instruction.ConditionalSelect, inRange, baseAddrLow, sbBaseAddrLow); + sbSlot = PrependOperation(Instruction.ConditionalSelect, inRange, Const(slot), sbSlot); + } + + if (config.AccessibleStorageBuffersMask != 0) + { + Operand baseAddrTrunc = PrependOperation(Instruction.BitwiseAnd, sbBaseAddrLow, alignMask); + Operand byteOffset = PrependOperation(Instruction.Subtract, addrLow, baseAddrTrunc); + + Operand[] sources = new Operand[operation.SourcesCount]; + + sources[0] = sbSlot; + + if (isStg16Or8) + { + sources[1] = byteOffset; + } + else + { + sources[1] = PrependOperation(Instruction.ShiftRightU32, byteOffset, Const(2)); + } + + for (int index = 2; index < operation.SourcesCount; index++) + { + sources[index] = operation.GetSource(index); + } + + if (isAtomic) + { + storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources); + } + else if (operation.Inst == Instruction.LoadGlobal) + { + storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources); + } + else + { + Instruction storeInst = operation.Inst switch + { + Instruction.StoreGlobal16 => Instruction.StoreStorage16, + Instruction.StoreGlobal8 => Instruction.StoreStorage8, + _ => Instruction.StoreStorage + }; + + storageOp = new Operation(storeInst, null, sources); + } + } + else if (operation.Dest != null) + { + storageOp = new Operation(Instruction.Copy, operation.Dest, Const(0)); + } + + if (operation.Inst == Instruction.LoadGlobal) + { + int cbeUseMask = config.AccessibleConstantBuffersMask; + + while (cbeUseMask != 0) + { + int slot = BitOperations.TrailingZeroCount(cbeUseMask); + int cbSlot = UbeFirstCbuf + slot; + + cbeUseMask &= ~(1 << slot); + + config.SetUsedConstantBuffer(cbSlot); + + Operand previousResult = PrependExistingOperation(storageOp); + + int cbOffset = GetConstantUbeOffset(slot); + + Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow); + + Operand baseAddrTruncConst = PrependOperation(Instruction.BitwiseAnd, baseAddrLow, alignMask); + Operand byteOffsetConst = PrependOperation(Instruction.Subtract, addrLow, baseAddrTruncConst); + + Operand cbIndex = PrependOperation(Instruction.ShiftRightU32, byteOffsetConst, Const(2)); + + Operand[] sourcesCb = new Operand[operation.SourcesCount]; + + sourcesCb[0] = Const(cbSlot); + sourcesCb[1] = cbIndex; + + for (int index = 2; index < operation.SourcesCount; index++) + { + sourcesCb[index] = operation.GetSource(index); + } + + Operand ldcResult = PrependOperation(Instruction.LoadConstant, sourcesCb); + + storageOp = new Operation(Instruction.ConditionalSelect, operation.Dest, inRange, ldcResult, previousResult); + } + } + + for (int index = 0; index < operation.SourcesCount; index++) + { + operation.SetSource(index, null); + } + + LinkedListNode<INode> oldNode = node; + LinkedList<INode> oldNodeList = oldNode.List; + + if (storageOp != null) + { + node = node.List.AddBefore(node, storageOp); + } + else + { + node = null; + } + + oldNodeList.Remove(oldNode); + + return node; + } + + private static LinkedListNode<INode> RewriteTextureSample(LinkedListNode<INode> node, ShaderConfig config) + { + TextureOperation texOp = (TextureOperation)node.Value; + + bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0; + bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0; + + bool hasInvalidOffset = (hasOffset || hasOffsets) && !config.GpuAccessor.QueryHostSupportsNonConstantTextureOffset(); + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + + bool isCoordNormalized = isBindless || config.GpuAccessor.QueryTextureCoordNormalized(texOp.Handle, texOp.CbufSlot); + + if (!hasInvalidOffset && isCoordNormalized) + { + return node; + } + + bool isGather = (texOp.Flags & TextureFlags.Gather) != 0; + bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0; + bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0; + bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0; + bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0; + + bool isArray = (texOp.Type & SamplerType.Array) != 0; + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0; + bool isShadow = (texOp.Type & SamplerType.Shadow) != 0; + + int coordsCount = texOp.Type.GetDimensions(); + + int offsetsCount; + + if (hasOffsets) + { + offsetsCount = coordsCount * 4; + } + else if (hasOffset) + { + offsetsCount = coordsCount; + } + else + { + offsetsCount = 0; + } + + Operand[] offsets = new Operand[offsetsCount]; + Operand[] sources = new Operand[texOp.SourcesCount - offsetsCount]; + + int copyCount = 0; + + if (isBindless || isIndexed) + { + copyCount++; + } + + Operand[] lodSources = new Operand[copyCount + coordsCount]; + + for (int index = 0; index < lodSources.Length; index++) + { + lodSources[index] = texOp.GetSource(index); + } + + copyCount += coordsCount; + + if (isArray) + { + copyCount++; + } + + if (isShadow) + { + copyCount++; + } + + if (hasDerivatives) + { + copyCount += coordsCount * 2; + } + + if (isMultisample) + { + copyCount++; + } + else if (hasLodLevel) + { + copyCount++; + } + + int srcIndex = 0; + int dstIndex = 0; + + for (int index = 0; index < copyCount; index++) + { + sources[dstIndex++] = texOp.GetSource(srcIndex++); + } + + bool areAllOffsetsConstant = true; + + for (int index = 0; index < offsetsCount; index++) + { + Operand offset = texOp.GetSource(srcIndex++); + + areAllOffsetsConstant &= offset.Type == OperandType.Constant; + + offsets[index] = offset; + } + + hasInvalidOffset &= !areAllOffsetsConstant; + + if (!hasInvalidOffset && isCoordNormalized) + { + return node; + } + + if (hasLodBias) + { + sources[dstIndex++] = texOp.GetSource(srcIndex++); + } + + if (isGather && !isShadow) + { + sources[dstIndex++] = texOp.GetSource(srcIndex++); + } + + int coordsIndex = isBindless || isIndexed ? 1 : 0; + + int componentIndex = texOp.Index; + + Operand Float(Operand value) + { + Operand res = Local(); + + node.List.AddBefore(node, new Operation(Instruction.ConvertS32ToFP32, res, value)); + + return res; + } + + // Emulate non-normalized coordinates by normalizing the coordinates on the shader. + // Without normalization, the coordinates are expected to the in the [0, W or H] range, + // and otherwise, it is expected to be in the [0, 1] range. + // We normalize by dividing the coords by the texture size. + if (!isCoordNormalized && !intCoords) + { + config.SetUsedFeature(FeatureFlags.IntegerSampling); + + int normCoordsCount = (texOp.Type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : coordsCount; + + for (int index = 0; index < normCoordsCount; index++) + { + Operand coordSize = Local(); + + Operand[] texSizeSources; + + if (isBindless || isIndexed) + { + texSizeSources = new Operand[] { sources[0], Const(0) }; + } + else + { + texSizeSources = new Operand[] { Const(0) }; + } + + node.List.AddBefore(node, new TextureOperation( + Instruction.TextureSize, + texOp.Type, + texOp.Format, + texOp.Flags, + texOp.CbufSlot, + texOp.Handle, + index, + new[] { coordSize }, + texSizeSources)); + + config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle); + + Operand source = sources[coordsIndex + index]; + + Operand coordNormalized = Local(); + + node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, coordNormalized, source, Float(coordSize))); + + sources[coordsIndex + index] = coordNormalized; + } + } + + Operand[] dests = new Operand[texOp.DestsCount]; + + for (int i = 0; i < texOp.DestsCount; i++) + { + dests[i] = texOp.GetDest(i); + } + + Operand bindlessHandle = isBindless || isIndexed ? sources[0] : null; + + LinkedListNode<INode> oldNode = node; + + // Technically, non-constant texture offsets are not allowed (according to the spec), + // however some GPUs does support that. + // For GPUs where it is not supported, we can replace the instruction with the following: + // For texture*Offset, we replace it by texture*, and add the offset to the P coords. + // The offset can be calculated as offset / textureSize(lod), where lod = textureQueryLod(coords). + // For texelFetchOffset, we replace it by texelFetch and add the offset to the P coords directly. + // For textureGatherOffset, we split the operation into up to 4 operations, one for each component + // that is accessed, where each textureGather operation has a different offset for each pixel. + if (hasInvalidOffset && isGather && !isShadow) + { + config.SetUsedFeature(FeatureFlags.IntegerSampling); + + Operand[] newSources = new Operand[sources.Length]; + + sources.CopyTo(newSources, 0); + + Operand[] texSizes = InsertTextureSize(node, texOp, lodSources, bindlessHandle, coordsCount); + + int destIndex = 0; + + for (int compIndex = 0; compIndex < 4; compIndex++) + { + if (((texOp.Index >> compIndex) & 1) == 0) + { + continue; + } + + for (int index = 0; index < coordsCount; index++) + { + config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle); + + Operand offset = Local(); + + Operand intOffset = offsets[index + (hasOffsets ? compIndex * coordsCount : 0)]; + + node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, offset, Float(intOffset), Float(texSizes[index]))); + + Operand source = sources[coordsIndex + index]; + + Operand coordPlusOffset = Local(); + + node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Add, coordPlusOffset, source, offset)); + + newSources[coordsIndex + index] = coordPlusOffset; + } + + TextureOperation newTexOp = new TextureOperation( + Instruction.TextureSample, + texOp.Type, + texOp.Format, + texOp.Flags & ~(TextureFlags.Offset | TextureFlags.Offsets), + texOp.CbufSlot, + texOp.Handle, + 1, + new[] { dests[destIndex++] }, + newSources); + + node = node.List.AddBefore(node, newTexOp); + } + } + else + { + if (hasInvalidOffset) + { + if (intCoords) + { + for (int index = 0; index < coordsCount; index++) + { + Operand source = sources[coordsIndex + index]; + + Operand coordPlusOffset = Local(); + + node.List.AddBefore(node, new Operation(Instruction.Add, coordPlusOffset, source, offsets[index])); + + sources[coordsIndex + index] = coordPlusOffset; + } + } + else + { + config.SetUsedFeature(FeatureFlags.IntegerSampling); + + Operand[] texSizes = InsertTextureSize(node, texOp, lodSources, bindlessHandle, coordsCount); + + for (int index = 0; index < coordsCount; index++) + { + config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle); + + Operand offset = Local(); + + Operand intOffset = offsets[index]; + + node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, offset, Float(intOffset), Float(texSizes[index]))); + + Operand source = sources[coordsIndex + index]; + + Operand coordPlusOffset = Local(); + + node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Add, coordPlusOffset, source, offset)); + + sources[coordsIndex + index] = coordPlusOffset; + } + } + } + + TextureOperation newTexOp = new TextureOperation( + Instruction.TextureSample, + texOp.Type, + texOp.Format, + texOp.Flags & ~(TextureFlags.Offset | TextureFlags.Offsets), + texOp.CbufSlot, + texOp.Handle, + componentIndex, + dests, + sources); + + node = node.List.AddBefore(node, newTexOp); + } + + node.List.Remove(oldNode); + + for (int index = 0; index < texOp.SourcesCount; index++) + { + texOp.SetSource(index, null); + } + + return node; + } + + private static Operand[] InsertTextureSize( + LinkedListNode<INode> node, + TextureOperation texOp, + Operand[] lodSources, + Operand bindlessHandle, + int coordsCount) + { + Operand Int(Operand value) + { + Operand res = Local(); + + node.List.AddBefore(node, new Operation(Instruction.ConvertFP32ToS32, res, value)); + + return res; + } + + Operand[] texSizes = new Operand[coordsCount]; + + Operand lod = Local(); + + node.List.AddBefore(node, new TextureOperation( + Instruction.Lod, + texOp.Type, + texOp.Format, + texOp.Flags, + texOp.CbufSlot, + texOp.Handle, + 0, + new[] { lod }, + lodSources)); + + for (int index = 0; index < coordsCount; index++) + { + texSizes[index] = Local(); + + Operand[] texSizeSources; + + if (bindlessHandle != null) + { + texSizeSources = new Operand[] { bindlessHandle, Int(lod) }; + } + else + { + texSizeSources = new Operand[] { Int(lod) }; + } + + node.List.AddBefore(node, new TextureOperation( + Instruction.TextureSize, + texOp.Type, + texOp.Format, + texOp.Flags, + texOp.CbufSlot, + texOp.Handle, + index, + new[] { texSizes[index] }, + texSizeSources)); + } + + return texSizes; + } + + private static LinkedListNode<INode> InsertSnormNormalization(LinkedListNode<INode> node, ShaderConfig config) + { + TextureOperation texOp = (TextureOperation)node.Value; + + // We can't query the format of a bindless texture, + // because the handle is unknown, it can have any format. + if (texOp.Flags.HasFlag(TextureFlags.Bindless)) + { + return node; + } + + TextureFormat format = config.GpuAccessor.QueryTextureFormat(texOp.Handle, texOp.CbufSlot); + + int maxPositive = format switch + { + TextureFormat.R8Snorm => sbyte.MaxValue, + TextureFormat.R8G8Snorm => sbyte.MaxValue, + TextureFormat.R8G8B8A8Snorm => sbyte.MaxValue, + TextureFormat.R16Snorm => short.MaxValue, + TextureFormat.R16G16Snorm => short.MaxValue, + TextureFormat.R16G16B16A16Snorm => short.MaxValue, + _ => 0 + }; + + // The value being 0 means that the format is not a SNORM format, + // so there's nothing to do here. + if (maxPositive == 0) + { + return node; + } + + // Do normalization. We assume SINT formats are being used + // as replacement for SNORM (which is not supported). + for (int i = 0; i < texOp.DestsCount; i++) + { + Operand dest = texOp.GetDest(i); + + INode[] uses = dest.UseOps.ToArray(); + + Operation convOp = new Operation(Instruction.ConvertS32ToFP32, Local(), dest); + Operation normOp = new Operation(Instruction.FP32 | Instruction.Multiply, Local(), convOp.Dest, ConstF(1f / maxPositive)); + + node = node.List.AddAfter(node, convOp); + node = node.List.AddAfter(node, normOp); + + foreach (INode useOp in uses) + { + if (useOp is not Operation op) + { + continue; + } + + // Replace all uses of the texture pixel value with the normalized value. + for (int index = 0; index < op.SourcesCount; index++) + { + if (op.GetSource(index) == dest) + { + op.SetSource(index, normOp.Dest); + } + } + } + } + + return node; + } + + private static bool ReplaceConstantBufferWithDrawParameters(LinkedListNode<INode> node, Operation operation) + { + Operand GenerateLoad(IoVariable ioVariable) + { + Operand value = Local(); + node.List.AddBefore(node, new Operation(Instruction.Load, StorageKind.Input, value, Const((int)ioVariable))); + return value; + } + + bool modified = false; + + for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++) + { + Operand src = operation.GetSource(srcIndex); + + if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0) + { + switch (src.GetCbufOffset()) + { + case Constants.NvnBaseVertexByteOffset / 4: + operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseVertex)); + modified = true; + break; + case Constants.NvnBaseInstanceByteOffset / 4: + operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseInstance)); + modified = true; + break; + case Constants.NvnDrawIndexByteOffset / 4: + operation.SetSource(srcIndex, GenerateLoad(IoVariable.DrawIndex)); + modified = true; + break; + } + } + } + + return modified; + } + + private static bool HasConstantBufferDrawParameters(Operation operation) + { + for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++) + { + Operand src = operation.GetSource(srcIndex); + + if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0) + { + switch (src.GetCbufOffset()) + { + case Constants.NvnBaseVertexByteOffset / 4: + case Constants.NvnBaseInstanceByteOffset / 4: + case Constants.NvnDrawIndexByteOffset / 4: + return true; + } + } + } + + return false; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs new file mode 100644 index 00000000..22f5a671 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs @@ -0,0 +1,944 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Numerics; + +namespace Ryujinx.Graphics.Shader.Translation +{ + class ShaderConfig + { + // TODO: Non-hardcoded array size. + public const int SamplerArraySize = 4; + + private const int ThreadsPerWarp = 32; + + public ShaderStage Stage { get; } + + public bool GpPassthrough { get; } + public bool LastInVertexPipeline { get; private set; } + + public bool HasLayerInputAttribute { get; private set; } + public int GpLayerInputAttribute { get; private set; } + public int ThreadsPerInputPrimitive { get; } + + public OutputTopology OutputTopology { get; } + + public int MaxOutputVertices { get; } + + public int LocalMemorySize { get; } + + public ImapPixelType[] ImapTypes { get; } + + public int OmapTargets { get; } + public bool OmapSampleMask { get; } + public bool OmapDepth { get; } + + public IGpuAccessor GpuAccessor { get; } + + public TranslationOptions Options { get; } + + public bool TransformFeedbackEnabled { get; } + + private TransformFeedbackOutput[] _transformFeedbackOutputs; + + readonly struct TransformFeedbackVariable : IEquatable<TransformFeedbackVariable> + { + public IoVariable IoVariable { get; } + public int Location { get; } + public int Component { get; } + + public TransformFeedbackVariable(IoVariable ioVariable, int location = 0, int component = 0) + { + IoVariable = ioVariable; + Location = location; + Component = component; + } + + public override bool Equals(object other) + { + return other is TransformFeedbackVariable tfbVar && Equals(tfbVar); + } + + public bool Equals(TransformFeedbackVariable other) + { + return IoVariable == other.IoVariable && + Location == other.Location && + Component == other.Component; + } + + public override int GetHashCode() + { + return (int)IoVariable | (Location << 8) | (Component << 16); + } + + public override string ToString() + { + return $"{IoVariable}.{Location}.{Component}"; + } + } + + private readonly Dictionary<TransformFeedbackVariable, TransformFeedbackOutput> _transformFeedbackDefinitions; + + public int Size { get; private set; } + + public byte ClipDistancesWritten { get; private set; } + + public FeatureFlags UsedFeatures { get; private set; } + + public int Cb1DataSize { get; private set; } + + public bool LayerOutputWritten { get; private set; } + public int LayerOutputAttribute { get; private set; } + + public bool NextUsesFixedFuncAttributes { get; private set; } + public int UsedInputAttributes { get; private set; } + public int UsedOutputAttributes { get; private set; } + public HashSet<int> UsedInputAttributesPerPatch { get; } + public HashSet<int> UsedOutputAttributesPerPatch { get; } + public HashSet<int> NextUsedInputAttributesPerPatch { get; private set; } + public int PassthroughAttributes { get; private set; } + private int _nextUsedInputAttributes; + private int _thisUsedInputAttributes; + private Dictionary<int, int> _perPatchAttributeLocations; + + public UInt128 NextInputAttributesComponents { get; private set; } + public UInt128 ThisInputAttributesComponents { get; private set; } + + public int AccessibleStorageBuffersMask { get; private set; } + public int AccessibleConstantBuffersMask { get; private set; } + + private int _usedConstantBuffers; + private int _usedStorageBuffers; + private int _usedStorageBuffersWrite; + + private readonly record struct TextureInfo(int CbufSlot, int Handle, bool Indexed, TextureFormat Format); + + private struct TextureMeta + { + public bool AccurateType; + public SamplerType Type; + public TextureUsageFlags UsageFlags; + } + + private readonly Dictionary<TextureInfo, TextureMeta> _usedTextures; + private readonly Dictionary<TextureInfo, TextureMeta> _usedImages; + + private BufferDescriptor[] _cachedConstantBufferDescriptors; + private BufferDescriptor[] _cachedStorageBufferDescriptors; + private TextureDescriptor[] _cachedTextureDescriptors; + private TextureDescriptor[] _cachedImageDescriptors; + + private int _firstConstantBufferBinding; + private int _firstStorageBufferBinding; + + public int FirstConstantBufferBinding => _firstConstantBufferBinding; + public int FirstStorageBufferBinding => _firstStorageBufferBinding; + + public ShaderConfig(IGpuAccessor gpuAccessor, TranslationOptions options) + { + Stage = ShaderStage.Compute; + GpuAccessor = gpuAccessor; + Options = options; + + _transformFeedbackDefinitions = new Dictionary<TransformFeedbackVariable, TransformFeedbackOutput>(); + + AccessibleStorageBuffersMask = (1 << GlobalMemory.StorageMaxCount) - 1; + AccessibleConstantBuffersMask = (1 << GlobalMemory.UbeMaxCount) - 1; + + UsedInputAttributesPerPatch = new HashSet<int>(); + UsedOutputAttributesPerPatch = new HashSet<int>(); + + _usedTextures = new Dictionary<TextureInfo, TextureMeta>(); + _usedImages = new Dictionary<TextureInfo, TextureMeta>(); + } + + public ShaderConfig( + ShaderStage stage, + OutputTopology outputTopology, + int maxOutputVertices, + IGpuAccessor gpuAccessor, + TranslationOptions options) : this(gpuAccessor, options) + { + Stage = stage; + ThreadsPerInputPrimitive = 1; + OutputTopology = outputTopology; + MaxOutputVertices = maxOutputVertices; + TransformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled(); + + if (Stage != ShaderStage.Compute) + { + AccessibleConstantBuffersMask = 0; + } + } + + public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationOptions options) : this(gpuAccessor, options) + { + Stage = header.Stage; + GpPassthrough = header.Stage == ShaderStage.Geometry && header.GpPassthrough; + ThreadsPerInputPrimitive = header.ThreadsPerInputPrimitive; + OutputTopology = header.OutputTopology; + MaxOutputVertices = header.MaxOutputVertexCount; + LocalMemorySize = header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize + (header.ShaderLocalMemoryCrsSize / ThreadsPerWarp); + ImapTypes = header.ImapTypes; + OmapTargets = header.OmapTargets; + OmapSampleMask = header.OmapSampleMask; + OmapDepth = header.OmapDepth; + TransformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled(); + LastInVertexPipeline = header.Stage < ShaderStage.Fragment; + } + + private void EnsureTransformFeedbackInitialized() + { + if (HasTransformFeedbackOutputs() && _transformFeedbackOutputs == null) + { + TransformFeedbackOutput[] transformFeedbackOutputs = new TransformFeedbackOutput[0xc0]; + ulong vecMap = 0UL; + + for (int tfbIndex = 0; tfbIndex < 4; tfbIndex++) + { + var locations = GpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex); + var stride = GpuAccessor.QueryTransformFeedbackStride(tfbIndex); + + for (int i = 0; i < locations.Length; i++) + { + byte wordOffset = locations[i]; + if (wordOffset < 0xc0) + { + transformFeedbackOutputs[wordOffset] = new TransformFeedbackOutput(tfbIndex, i * 4, stride); + vecMap |= 1UL << (wordOffset / 4); + } + } + } + + _transformFeedbackOutputs = transformFeedbackOutputs; + + while (vecMap != 0) + { + int vecIndex = BitOperations.TrailingZeroCount(vecMap); + + for (int subIndex = 0; subIndex < 4; subIndex++) + { + int wordOffset = vecIndex * 4 + subIndex; + int byteOffset = wordOffset * 4; + + if (transformFeedbackOutputs[wordOffset].Valid) + { + IoVariable ioVariable = Instructions.AttributeMap.GetIoVariable(this, byteOffset, out int location); + int component = 0; + + if (HasPerLocationInputOrOutputComponent(ioVariable, location, subIndex, isOutput: true)) + { + component = subIndex; + } + + var transformFeedbackVariable = new TransformFeedbackVariable(ioVariable, location, component); + _transformFeedbackDefinitions.TryAdd(transformFeedbackVariable, transformFeedbackOutputs[wordOffset]); + } + } + + vecMap &= ~(1UL << vecIndex); + } + } + } + + public TransformFeedbackOutput[] GetTransformFeedbackOutputs() + { + EnsureTransformFeedbackInitialized(); + return _transformFeedbackOutputs; + } + + public bool TryGetTransformFeedbackOutput(IoVariable ioVariable, int location, int component, out TransformFeedbackOutput transformFeedbackOutput) + { + EnsureTransformFeedbackInitialized(); + var transformFeedbackVariable = new TransformFeedbackVariable(ioVariable, location, component); + return _transformFeedbackDefinitions.TryGetValue(transformFeedbackVariable, out transformFeedbackOutput); + } + + private bool HasTransformFeedbackOutputs() + { + return TransformFeedbackEnabled && (LastInVertexPipeline || Stage == ShaderStage.Fragment); + } + + public bool HasTransformFeedbackOutputs(bool isOutput) + { + return TransformFeedbackEnabled && ((isOutput && LastInVertexPipeline) || (!isOutput && Stage == ShaderStage.Fragment)); + } + + public bool HasPerLocationInputOrOutput(IoVariable ioVariable, bool isOutput) + { + if (ioVariable == IoVariable.UserDefined) + { + return (!isOutput && !UsedFeatures.HasFlag(FeatureFlags.IaIndexing)) || + (isOutput && !UsedFeatures.HasFlag(FeatureFlags.OaIndexing)); + } + + return ioVariable == IoVariable.FragmentOutputColor; + } + + public bool HasPerLocationInputOrOutputComponent(IoVariable ioVariable, int location, int component, bool isOutput) + { + if (ioVariable != IoVariable.UserDefined || !HasTransformFeedbackOutputs(isOutput)) + { + return false; + } + + return GetTransformFeedbackOutputComponents(location, component) == 1; + } + + public TransformFeedbackOutput GetTransformFeedbackOutput(int wordOffset) + { + EnsureTransformFeedbackInitialized(); + + return _transformFeedbackOutputs[wordOffset]; + } + + public TransformFeedbackOutput GetTransformFeedbackOutput(int location, int component) + { + return GetTransformFeedbackOutput((AttributeConsts.UserAttributeBase / 4) + location * 4 + component); + } + + public int GetTransformFeedbackOutputComponents(int location, int component) + { + EnsureTransformFeedbackInitialized(); + + int baseIndex = (AttributeConsts.UserAttributeBase / 4) + location * 4; + int index = baseIndex + component; + int count = 1; + + for (; count < 4; count++) + { + ref var prev = ref _transformFeedbackOutputs[baseIndex + count - 1]; + ref var curr = ref _transformFeedbackOutputs[baseIndex + count]; + + int prevOffset = prev.Offset; + int currOffset = curr.Offset; + + if (!prev.Valid || !curr.Valid || prevOffset + 4 != currOffset) + { + break; + } + } + + if (baseIndex + count <= index) + { + return 1; + } + + return count; + } + + public AggregateType GetFragmentOutputColorType(int location) + { + return AggregateType.Vector4 | GpuAccessor.QueryFragmentOutputType(location).ToAggregateType(); + } + + public AggregateType GetUserDefinedType(int location, bool isOutput) + { + if ((!isOutput && UsedFeatures.HasFlag(FeatureFlags.IaIndexing)) || + (isOutput && UsedFeatures.HasFlag(FeatureFlags.OaIndexing))) + { + return AggregateType.Array | AggregateType.Vector4 | AggregateType.FP32; + } + + AggregateType type = AggregateType.Vector4; + + if (Stage == ShaderStage.Vertex && !isOutput) + { + type |= GpuAccessor.QueryAttributeType(location).ToAggregateType(); + } + else + { + type |= AggregateType.FP32; + } + + return type; + } + + public int GetDepthRegister() + { + // The depth register is always two registers after the last color output. + return BitOperations.PopCount((uint)OmapTargets) + 1; + } + + public uint ConstantBuffer1Read(int offset) + { + if (Cb1DataSize < offset + 4) + { + Cb1DataSize = offset + 4; + } + + return GpuAccessor.ConstantBuffer1Read(offset); + } + + public TextureFormat GetTextureFormat(int handle, int cbufSlot = -1) + { + // When the formatted load extension is supported, we don't need to + // specify a format, we can just declare it without a format and the GPU will handle it. + if (GpuAccessor.QueryHostSupportsImageLoadFormatted()) + { + return TextureFormat.Unknown; + } + + var format = GpuAccessor.QueryTextureFormat(handle, cbufSlot); + + if (format == TextureFormat.Unknown) + { + GpuAccessor.Log($"Unknown format for texture {handle}."); + + format = TextureFormat.R8G8B8A8Unorm; + } + + return format; + } + + private static bool FormatSupportsAtomic(TextureFormat format) + { + return format == TextureFormat.R32Sint || format == TextureFormat.R32Uint; + } + + public TextureFormat GetTextureFormatAtomic(int handle, int cbufSlot = -1) + { + // Atomic image instructions do not support GL_EXT_shader_image_load_formatted, + // and must have a type specified. Default to R32Sint if not available. + + var format = GpuAccessor.QueryTextureFormat(handle, cbufSlot); + + if (!FormatSupportsAtomic(format)) + { + GpuAccessor.Log($"Unsupported format for texture {handle}: {format}."); + + format = TextureFormat.R32Sint; + } + + return format; + } + + public void SizeAdd(int size) + { + Size += size; + } + + public void InheritFrom(ShaderConfig other) + { + ClipDistancesWritten |= other.ClipDistancesWritten; + UsedFeatures |= other.UsedFeatures; + + UsedInputAttributes |= other.UsedInputAttributes; + UsedOutputAttributes |= other.UsedOutputAttributes; + _usedConstantBuffers |= other._usedConstantBuffers; + _usedStorageBuffers |= other._usedStorageBuffers; + _usedStorageBuffersWrite |= other._usedStorageBuffersWrite; + + foreach (var kv in other._usedTextures) + { + if (!_usedTextures.TryAdd(kv.Key, kv.Value)) + { + _usedTextures[kv.Key] = MergeTextureMeta(kv.Value, _usedTextures[kv.Key]); + } + } + + foreach (var kv in other._usedImages) + { + if (!_usedImages.TryAdd(kv.Key, kv.Value)) + { + _usedImages[kv.Key] = MergeTextureMeta(kv.Value, _usedImages[kv.Key]); + } + } + } + + public void SetLayerOutputAttribute(int attr) + { + LayerOutputWritten = true; + LayerOutputAttribute = attr; + } + + public void SetGeometryShaderLayerInputAttribute(int attr) + { + HasLayerInputAttribute = true; + GpLayerInputAttribute = attr; + } + + public void SetLastInVertexPipeline() + { + LastInVertexPipeline = true; + } + + public void SetInputUserAttributeFixedFunc(int index) + { + UsedInputAttributes |= 1 << index; + } + + public void SetOutputUserAttributeFixedFunc(int index) + { + UsedOutputAttributes |= 1 << index; + } + + public void SetInputUserAttribute(int index, int component) + { + int mask = 1 << index; + + UsedInputAttributes |= mask; + _thisUsedInputAttributes |= mask; + ThisInputAttributesComponents |= UInt128.One << (index * 4 + component); + } + + public void SetInputUserAttributePerPatch(int index) + { + UsedInputAttributesPerPatch.Add(index); + } + + public void SetOutputUserAttribute(int index) + { + UsedOutputAttributes |= 1 << index; + } + + public void SetOutputUserAttributePerPatch(int index) + { + UsedOutputAttributesPerPatch.Add(index); + } + + public void MergeFromtNextStage(ShaderConfig config) + { + NextInputAttributesComponents = config.ThisInputAttributesComponents; + NextUsedInputAttributesPerPatch = config.UsedInputAttributesPerPatch; + NextUsesFixedFuncAttributes = config.UsedFeatures.HasFlag(FeatureFlags.FixedFuncAttr); + MergeOutputUserAttributes(config.UsedInputAttributes, config.UsedInputAttributesPerPatch); + + if (UsedOutputAttributesPerPatch.Count != 0) + { + // Regular and per-patch input/output locations can't overlap, + // so we must assign on our location using unused regular input/output locations. + + Dictionary<int, int> locationsMap = new Dictionary<int, int>(); + + int freeMask = ~UsedOutputAttributes; + + foreach (int attr in UsedOutputAttributesPerPatch) + { + int location = BitOperations.TrailingZeroCount(freeMask); + if (location == 32) + { + config.GpuAccessor.Log($"No enough free locations for patch input/output 0x{attr:X}."); + break; + } + + locationsMap.Add(attr, location); + freeMask &= ~(1 << location); + } + + // Both stages must agree on the locations, so use the same "map" for both. + _perPatchAttributeLocations = locationsMap; + config._perPatchAttributeLocations = locationsMap; + } + + // We don't consider geometry shaders using the geometry shader passthrough feature + // as being the last because when this feature is used, it can't actually modify any of the outputs, + // so the stage that comes before it is the last one that can do modifications. + if (config.Stage != ShaderStage.Fragment && (config.Stage != ShaderStage.Geometry || !config.GpPassthrough)) + { + LastInVertexPipeline = false; + } + } + + public void MergeOutputUserAttributes(int mask, IEnumerable<int> perPatch) + { + _nextUsedInputAttributes = mask; + + if (GpPassthrough) + { + PassthroughAttributes = mask & ~UsedOutputAttributes; + } + else + { + UsedOutputAttributes |= mask; + UsedOutputAttributesPerPatch.UnionWith(perPatch); + } + } + + public int GetPerPatchAttributeLocation(int index) + { + if (_perPatchAttributeLocations == null || !_perPatchAttributeLocations.TryGetValue(index, out int location)) + { + return index; + } + + return location; + } + + public bool IsUsedOutputAttribute(int attr) + { + // The check for fixed function attributes on the next stage is conservative, + // returning false if the output is just not used by the next stage is also valid. + if (NextUsesFixedFuncAttributes && + attr >= AttributeConsts.UserAttributeBase && + attr < AttributeConsts.UserAttributeEnd) + { + int index = (attr - AttributeConsts.UserAttributeBase) >> 4; + return (_nextUsedInputAttributes & (1 << index)) != 0; + } + + return true; + } + + public int GetFreeUserAttribute(bool isOutput, int index) + { + int useMask = isOutput ? _nextUsedInputAttributes : _thisUsedInputAttributes; + int bit = -1; + + while (useMask != -1) + { + bit = BitOperations.TrailingZeroCount(~useMask); + + if (bit == 32) + { + bit = -1; + break; + } + else if (index < 1) + { + break; + } + + useMask |= 1 << bit; + index--; + } + + return bit; + } + + public void SetAllInputUserAttributes() + { + UsedInputAttributes |= Constants.AllAttributesMask; + ThisInputAttributesComponents |= ~UInt128.Zero >> (128 - Constants.MaxAttributes * 4); + } + + public void SetAllOutputUserAttributes() + { + UsedOutputAttributes |= Constants.AllAttributesMask; + } + + public void SetClipDistanceWritten(int index) + { + ClipDistancesWritten |= (byte)(1 << index); + } + + public void SetUsedFeature(FeatureFlags flags) + { + UsedFeatures |= flags; + } + + public void SetAccessibleBufferMasks(int sbMask, int ubeMask) + { + AccessibleStorageBuffersMask = sbMask; + AccessibleConstantBuffersMask = ubeMask; + } + + public void SetUsedConstantBuffer(int slot) + { + _usedConstantBuffers |= 1 << slot; + } + + public void SetUsedStorageBuffer(int slot, bool write) + { + int mask = 1 << slot; + _usedStorageBuffers |= mask; + + if (write) + { + _usedStorageBuffersWrite |= mask; + } + } + + public void SetUsedTexture( + Instruction inst, + SamplerType type, + TextureFormat format, + TextureFlags flags, + int cbufSlot, + int handle) + { + inst &= Instruction.Mask; + bool isImage = inst == Instruction.ImageLoad || inst == Instruction.ImageStore || inst == Instruction.ImageAtomic; + bool isWrite = inst == Instruction.ImageStore || inst == Instruction.ImageAtomic; + bool accurateType = inst != Instruction.Lod && inst != Instruction.TextureSize; + bool coherent = flags.HasFlag(TextureFlags.Coherent); + + if (isImage) + { + SetUsedTextureOrImage(_usedImages, cbufSlot, handle, type, format, true, isWrite, false, coherent); + } + else + { + bool intCoords = flags.HasFlag(TextureFlags.IntCoords) || inst == Instruction.TextureSize; + SetUsedTextureOrImage(_usedTextures, cbufSlot, handle, type, TextureFormat.Unknown, intCoords, false, accurateType, coherent); + } + + GpuAccessor.RegisterTexture(handle, cbufSlot); + } + + private void SetUsedTextureOrImage( + Dictionary<TextureInfo, TextureMeta> dict, + int cbufSlot, + int handle, + SamplerType type, + TextureFormat format, + bool intCoords, + bool write, + bool accurateType, + bool coherent) + { + var dimensions = type.GetDimensions(); + var isIndexed = type.HasFlag(SamplerType.Indexed); + + var usageFlags = TextureUsageFlags.None; + + if (intCoords) + { + usageFlags |= TextureUsageFlags.NeedsScaleValue; + + var canScale = Stage.SupportsRenderScale() && !isIndexed && !write && dimensions == 2; + + if (!canScale) + { + // Resolution scaling cannot be applied to this texture right now. + // Flag so that we know to blacklist scaling on related textures when binding them. + usageFlags |= TextureUsageFlags.ResScaleUnsupported; + } + } + + if (write) + { + usageFlags |= TextureUsageFlags.ImageStore; + } + + if (coherent) + { + usageFlags |= TextureUsageFlags.ImageCoherent; + } + + int arraySize = isIndexed ? SamplerArraySize : 1; + + for (int layer = 0; layer < arraySize; layer++) + { + var info = new TextureInfo(cbufSlot, handle + layer * 2, isIndexed, format); + var meta = new TextureMeta() + { + AccurateType = accurateType, + Type = type, + UsageFlags = usageFlags + }; + + if (dict.TryGetValue(info, out var existingMeta)) + { + dict[info] = MergeTextureMeta(meta, existingMeta); + } + else + { + dict.Add(info, meta); + } + } + } + + private static TextureMeta MergeTextureMeta(TextureMeta meta, TextureMeta existingMeta) + { + meta.UsageFlags |= existingMeta.UsageFlags; + + // If the texture we have has inaccurate type information, then + // we prefer the most accurate one. + if (existingMeta.AccurateType) + { + meta.AccurateType = true; + meta.Type = existingMeta.Type; + } + + return meta; + } + + public BufferDescriptor[] GetConstantBufferDescriptors() + { + if (_cachedConstantBufferDescriptors != null) + { + return _cachedConstantBufferDescriptors; + } + + int usedMask = _usedConstantBuffers; + + if (UsedFeatures.HasFlag(FeatureFlags.CbIndexing)) + { + usedMask |= (int)GpuAccessor.QueryConstantBufferUse(); + } + + return _cachedConstantBufferDescriptors = GetBufferDescriptors( + usedMask, + 0, + UsedFeatures.HasFlag(FeatureFlags.CbIndexing), + out _firstConstantBufferBinding, + GpuAccessor.QueryBindingConstantBuffer); + } + + public BufferDescriptor[] GetStorageBufferDescriptors() + { + if (_cachedStorageBufferDescriptors != null) + { + return _cachedStorageBufferDescriptors; + } + + return _cachedStorageBufferDescriptors = GetBufferDescriptors( + _usedStorageBuffers, + _usedStorageBuffersWrite, + true, + out _firstStorageBufferBinding, + GpuAccessor.QueryBindingStorageBuffer); + } + + private static BufferDescriptor[] GetBufferDescriptors( + int usedMask, + int writtenMask, + bool isArray, + out int firstBinding, + Func<int, int> getBindingCallback) + { + firstBinding = 0; + bool hasFirstBinding = false; + var descriptors = new BufferDescriptor[BitOperations.PopCount((uint)usedMask)]; + + int lastSlot = -1; + + for (int i = 0; i < descriptors.Length; i++) + { + int slot = BitOperations.TrailingZeroCount(usedMask); + + if (isArray) + { + // The next array entries also consumes bindings, even if they are unused. + for (int j = lastSlot + 1; j < slot; j++) + { + int binding = getBindingCallback(j); + + if (!hasFirstBinding) + { + firstBinding = binding; + hasFirstBinding = true; + } + } + } + + lastSlot = slot; + + descriptors[i] = new BufferDescriptor(getBindingCallback(slot), slot); + + if (!hasFirstBinding) + { + firstBinding = descriptors[i].Binding; + hasFirstBinding = true; + } + + if ((writtenMask & (1 << slot)) != 0) + { + descriptors[i].SetFlag(BufferUsageFlags.Write); + } + + usedMask &= ~(1 << slot); + } + + return descriptors; + } + + public TextureDescriptor[] GetTextureDescriptors() + { + return _cachedTextureDescriptors ??= GetTextureOrImageDescriptors(_usedTextures, GpuAccessor.QueryBindingTexture); + } + + public TextureDescriptor[] GetImageDescriptors() + { + return _cachedImageDescriptors ??= GetTextureOrImageDescriptors(_usedImages, GpuAccessor.QueryBindingImage); + } + + private static TextureDescriptor[] GetTextureOrImageDescriptors(Dictionary<TextureInfo, TextureMeta> dict, Func<int, bool, int> getBindingCallback) + { + var descriptors = new TextureDescriptor[dict.Count]; + + int i = 0; + foreach (var kv in dict.OrderBy(x => x.Key.Indexed).OrderBy(x => x.Key.Handle)) + { + var info = kv.Key; + var meta = kv.Value; + + bool isBuffer = (meta.Type & SamplerType.Mask) == SamplerType.TextureBuffer; + int binding = getBindingCallback(i, isBuffer); + + descriptors[i] = new TextureDescriptor(binding, meta.Type, info.Format, info.CbufSlot, info.Handle); + descriptors[i].SetFlag(meta.UsageFlags); + i++; + } + + return descriptors; + } + + public (TextureDescriptor, int) FindTextureDescriptor(AstTextureOperation texOp) + { + TextureDescriptor[] descriptors = GetTextureDescriptors(); + + for (int i = 0; i < descriptors.Length; i++) + { + var descriptor = descriptors[i]; + + if (descriptor.CbufSlot == texOp.CbufSlot && + descriptor.HandleIndex == texOp.Handle && + descriptor.Format == texOp.Format) + { + return (descriptor, i); + } + } + + return (default, -1); + } + + private static int FindDescriptorIndex(TextureDescriptor[] array, AstTextureOperation texOp) + { + for (int i = 0; i < array.Length; i++) + { + var descriptor = array[i]; + + if (descriptor.Type == texOp.Type && + descriptor.CbufSlot == texOp.CbufSlot && + descriptor.HandleIndex == texOp.Handle && + descriptor.Format == texOp.Format) + { + return i; + } + } + + return -1; + } + + public int FindTextureDescriptorIndex(AstTextureOperation texOp) + { + return FindDescriptorIndex(GetTextureDescriptors(), texOp); + } + + public int FindImageDescriptorIndex(AstTextureOperation texOp) + { + return FindDescriptorIndex(GetImageDescriptors(), texOp); + } + + public ShaderProgramInfo CreateProgramInfo(ShaderIdentification identification = ShaderIdentification.None) + { + return new ShaderProgramInfo( + GetConstantBufferDescriptors(), + GetStorageBufferDescriptors(), + GetTextureDescriptors(), + GetImageDescriptors(), + identification, + GpLayerInputAttribute, + Stage, + UsedFeatures.HasFlag(FeatureFlags.InstanceId), + UsedFeatures.HasFlag(FeatureFlags.DrawParameters), + UsedFeatures.HasFlag(FeatureFlags.RtLayer), + ClipDistancesWritten, + OmapTargets); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderHeader.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderHeader.cs new file mode 100644 index 00000000..01f7f08a --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderHeader.cs @@ -0,0 +1,158 @@ +using Ryujinx.Common.Utilities; +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Shader.Translation +{ + enum PixelImap + { + Unused = 0, + Constant = 1, + Perspective = 2, + ScreenLinear = 3 + } + + readonly struct ImapPixelType + { + public PixelImap X { get; } + public PixelImap Y { get; } + public PixelImap Z { get; } + public PixelImap W { get; } + + public ImapPixelType(PixelImap x, PixelImap y, PixelImap z, PixelImap w) + { + X = x; + Y = y; + Z = z; + W = w; + } + + public PixelImap GetFirstUsedType() + { + if (X != PixelImap.Unused) return X; + if (Y != PixelImap.Unused) return Y; + if (Z != PixelImap.Unused) return Z; + return W; + } + } + + class ShaderHeader + { + public int SphType { get; } + public int Version { get; } + + public ShaderStage Stage { get; } + + public bool MrtEnable { get; } + + public bool KillsPixels { get; } + + public bool DoesGlobalStore { get; } + + public int SassVersion { get; } + + public bool GpPassthrough { get; } + + public bool DoesLoadOrStore { get; } + public bool DoesFp64 { get; } + + public int StreamOutMask { get; } + + public int ShaderLocalMemoryLowSize { get; } + + public int PerPatchAttributeCount { get; } + + public int ShaderLocalMemoryHighSize { get; } + + public int ThreadsPerInputPrimitive { get; } + + public int ShaderLocalMemoryCrsSize { get; } + + public OutputTopology OutputTopology { get; } + + public int MaxOutputVertexCount { get; } + + public int StoreReqStart { get; } + public int StoreReqEnd { get; } + + public ImapPixelType[] ImapTypes { get; } + + public int OmapTargets { get; } + public bool OmapSampleMask { get; } + public bool OmapDepth { get; } + + public ShaderHeader(IGpuAccessor gpuAccessor, ulong address) + { + ReadOnlySpan<int> header = MemoryMarshal.Cast<ulong, int>(gpuAccessor.GetCode(address, 0x50)); + + int commonWord0 = header[0]; + int commonWord1 = header[1]; + int commonWord2 = header[2]; + int commonWord3 = header[3]; + int commonWord4 = header[4]; + + SphType = commonWord0.Extract(0, 5); + Version = commonWord0.Extract(5, 5); + + Stage = (ShaderStage)commonWord0.Extract(10, 4); + + // Invalid. + if (Stage == ShaderStage.Compute) + { + Stage = ShaderStage.Vertex; + } + + MrtEnable = commonWord0.Extract(14); + + KillsPixels = commonWord0.Extract(15); + + DoesGlobalStore = commonWord0.Extract(16); + + SassVersion = commonWord0.Extract(17, 4); + + GpPassthrough = commonWord0.Extract(24); + + DoesLoadOrStore = commonWord0.Extract(26); + DoesFp64 = commonWord0.Extract(27); + + StreamOutMask = commonWord0.Extract(28, 4); + + ShaderLocalMemoryLowSize = commonWord1.Extract(0, 24); + + PerPatchAttributeCount = commonWord1.Extract(24, 8); + + ShaderLocalMemoryHighSize = commonWord2.Extract(0, 24); + + ThreadsPerInputPrimitive = commonWord2.Extract(24, 8); + + ShaderLocalMemoryCrsSize = commonWord3.Extract(0, 24); + + OutputTopology = (OutputTopology)commonWord3.Extract(24, 4); + + MaxOutputVertexCount = commonWord4.Extract(0, 12); + + StoreReqStart = commonWord4.Extract(12, 8); + StoreReqEnd = commonWord4.Extract(24, 8); + + ImapTypes = new ImapPixelType[32]; + + for (int i = 0; i < 32; i++) + { + byte imap = (byte)(header[6 + (i >> 2)] >> ((i & 3) * 8)); + + ImapTypes[i] = new ImapPixelType( + (PixelImap)((imap >> 0) & 3), + (PixelImap)((imap >> 2) & 3), + (PixelImap)((imap >> 4) & 3), + (PixelImap)((imap >> 6) & 3)); + } + + int type2OmapTarget = header[18]; + int type2Omap = header[19]; + + OmapTargets = type2OmapTarget; + OmapSampleMask = type2Omap.Extract(0); + OmapDepth = type2Omap.Extract(1); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs new file mode 100644 index 00000000..53f1e847 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs @@ -0,0 +1,185 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation +{ + static class ShaderIdentifier + { + public static ShaderIdentification Identify(Function[] functions, ShaderConfig config) + { + if (config.Stage == ShaderStage.Geometry && + config.GpuAccessor.QueryPrimitiveTopology() == InputTopology.Triangles && + !config.GpuAccessor.QueryHostSupportsGeometryShader() && + IsLayerPassthroughGeometryShader(functions, out int layerInputAttr)) + { + config.SetGeometryShaderLayerInputAttribute(layerInputAttr); + + return ShaderIdentification.GeometryLayerPassthrough; + } + + return ShaderIdentification.None; + } + + private static bool IsLayerPassthroughGeometryShader(Function[] functions, out int layerInputAttr) + { + bool writesLayer = false; + layerInputAttr = 0; + + if (functions.Length != 1) + { + return false; + } + + int verticesCount = 0; + int totalVerticesCount = 0; + + foreach (BasicBlock block in functions[0].Blocks) + { + // We are not expecting loops or any complex control flow here, so fail in those cases. + if (block.Branch != null && block.Branch.Index <= block.Index) + { + return false; + } + + foreach (INode node in block.Operations) + { + if (!(node is Operation operation)) + { + continue; + } + + if (IsResourceWrite(operation.Inst)) + { + return false; + } + + if (operation.Inst == Instruction.Store && operation.StorageKind == StorageKind.Output) + { + Operand src = operation.GetSource(operation.SourcesCount - 1); + Operation srcAttributeAsgOp = null; + + if (src.Type == OperandType.LocalVariable && + src.AsgOp is Operation asgOp && + asgOp.Inst == Instruction.Load && + asgOp.StorageKind.IsInputOrOutput()) + { + if (asgOp.StorageKind != StorageKind.Input) + { + return false; + } + + srcAttributeAsgOp = asgOp; + } + + if (srcAttributeAsgOp != null) + { + IoVariable dstAttribute = (IoVariable)operation.GetSource(0).Value; + IoVariable srcAttribute = (IoVariable)srcAttributeAsgOp.GetSource(0).Value; + + if (dstAttribute == IoVariable.Layer && srcAttribute == IoVariable.UserDefined) + { + if (srcAttributeAsgOp.SourcesCount != 4) + { + return false; + } + + writesLayer = true; + layerInputAttr = srcAttributeAsgOp.GetSource(1).Value * 4 + srcAttributeAsgOp.GetSource(3).Value;; + } + else + { + if (dstAttribute != srcAttribute) + { + return false; + } + + int inputsCount = operation.SourcesCount - 2; + + if (dstAttribute == IoVariable.UserDefined) + { + if (operation.GetSource(1).Value != srcAttributeAsgOp.GetSource(1).Value) + { + return false; + } + + inputsCount--; + } + + for (int i = 0; i < inputsCount; i++) + { + int dstIndex = operation.SourcesCount - 2 - i; + int srcIndex = srcAttributeAsgOp.SourcesCount - 1 - i; + + if ((dstIndex | srcIndex) < 0) + { + return false; + } + + if (operation.GetSource(dstIndex).Type != OperandType.Constant || + srcAttributeAsgOp.GetSource(srcIndex).Type != OperandType.Constant || + operation.GetSource(dstIndex).Value != srcAttributeAsgOp.GetSource(srcIndex).Value) + { + return false; + } + } + } + } + else if (src.Type == OperandType.Constant) + { + int dstComponent = operation.GetSource(operation.SourcesCount - 2).Value; + float expectedValue = dstComponent == 3 ? 1f : 0f; + + if (src.AsFloat() != expectedValue) + { + return false; + } + } + else + { + return false; + } + } + else if (operation.Inst == Instruction.EmitVertex) + { + verticesCount++; + } + else if (operation.Inst == Instruction.EndPrimitive) + { + totalVerticesCount += verticesCount; + verticesCount = 0; + } + } + } + + return totalVerticesCount + verticesCount == 3 && writesLayer; + } + + private static bool IsResourceWrite(Instruction inst) + { + switch (inst) + { + case Instruction.AtomicAdd: + case Instruction.AtomicAnd: + case Instruction.AtomicCompareAndSwap: + case Instruction.AtomicMaxS32: + case Instruction.AtomicMaxU32: + case Instruction.AtomicMinS32: + case Instruction.AtomicMinU32: + case Instruction.AtomicOr: + case Instruction.AtomicSwap: + case Instruction.AtomicXor: + case Instruction.ImageAtomic: + case Instruction.ImageStore: + case Instruction.StoreGlobal: + case Instruction.StoreGlobal16: + case Instruction.StoreGlobal8: + case Instruction.StoreStorage: + case Instruction.StoreStorage16: + case Instruction.StoreStorage8: + return true; + } + + return false; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/Ssa.cs b/src/Ryujinx.Graphics.Shader/Translation/Ssa.cs new file mode 100644 index 00000000..16b8b924 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Ssa.cs @@ -0,0 +1,376 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation +{ + static class Ssa + { + private const int GprsAndPredsCount = RegisterConsts.GprsCount + RegisterConsts.PredsCount; + + private class DefMap + { + private Dictionary<Register, Operand> _map; + + private long[] _phiMasks; + + public DefMap() + { + _map = new Dictionary<Register, Operand>(); + + _phiMasks = new long[(RegisterConsts.TotalCount + 63) / 64]; + } + + public bool TryAddOperand(Register reg, Operand operand) + { + return _map.TryAdd(reg, operand); + } + + public bool TryGetOperand(Register reg, out Operand operand) + { + return _map.TryGetValue(reg, out operand); + } + + public bool AddPhi(Register reg) + { + int key = GetKeyFromRegister(reg); + + int index = key / 64; + int bit = key & 63; + + long mask = 1L << bit; + + if ((_phiMasks[index] & mask) != 0) + { + return false; + } + + _phiMasks[index] |= mask; + + return true; + } + + public bool HasPhi(Register reg) + { + int key = GetKeyFromRegister(reg); + + int index = key / 64; + int bit = key & 63; + + return (_phiMasks[index] & (1L << bit)) != 0; + } + } + + private class LocalDefMap + { + private Operand[] _map; + private int[] _uses; + public int UseCount { get; private set; } + + public LocalDefMap() + { + _map = new Operand[RegisterConsts.TotalCount]; + _uses = new int[RegisterConsts.TotalCount]; + } + + public Operand Get(int key) + { + return _map[key]; + } + + public void Add(int key, Operand operand) + { + if (_map[key] == null) + { + _uses[UseCount++] = key; + } + + _map[key] = operand; + } + + public Operand GetUse(int index, out int key) + { + key = _uses[index]; + + return _map[key]; + } + + public void Clear() + { + for (int i = 0; i < UseCount; i++) + { + _map[_uses[i]] = null; + } + + UseCount = 0; + } + } + + private readonly struct Definition + { + public BasicBlock Block { get; } + public Operand Local { get; } + + public Definition(BasicBlock block, Operand local) + { + Block = block; + Local = local; + } + } + + public static void Rename(BasicBlock[] blocks) + { + DefMap[] globalDefs = new DefMap[blocks.Length]; + LocalDefMap localDefs = new LocalDefMap(); + + for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) + { + globalDefs[blkIndex] = new DefMap(); + } + + Queue<BasicBlock> dfPhiBlocks = new Queue<BasicBlock>(); + + // First pass, get all defs and locals uses. + for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) + { + Operand RenameLocal(Operand operand) + { + if (operand != null && operand.Type == OperandType.Register) + { + Operand local = localDefs.Get(GetKeyFromRegister(operand.GetRegister())); + + operand = local ?? operand; + } + + return operand; + } + + BasicBlock block = blocks[blkIndex]; + + LinkedListNode<INode> node = block.Operations.First; + + while (node != null) + { + if (node.Value is Operation operation) + { + for (int index = 0; index < operation.SourcesCount; index++) + { + operation.SetSource(index, RenameLocal(operation.GetSource(index))); + } + + for (int index = 0; index < operation.DestsCount; index++) + { + Operand dest = operation.GetDest(index); + + if (dest != null && dest.Type == OperandType.Register) + { + Operand local = Local(); + + localDefs.Add(GetKeyFromRegister(dest.GetRegister()), local); + + operation.SetDest(index, local); + } + } + } + + node = node.Next; + } + + int localUses = localDefs.UseCount; + for (int index = 0; index < localUses; index++) + { + Operand local = localDefs.GetUse(index, out int key); + + Register reg = GetRegisterFromKey(key); + + globalDefs[block.Index].TryAddOperand(reg, local); + + dfPhiBlocks.Enqueue(block); + + while (dfPhiBlocks.TryDequeue(out BasicBlock dfPhiBlock)) + { + foreach (BasicBlock domFrontier in dfPhiBlock.DominanceFrontiers) + { + if (globalDefs[domFrontier.Index].AddPhi(reg)) + { + dfPhiBlocks.Enqueue(domFrontier); + } + } + } + } + + localDefs.Clear(); + } + + // Second pass, rename variables with definitions on different blocks. + for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) + { + BasicBlock block = blocks[blkIndex]; + + Operand RenameGlobal(Operand operand) + { + if (operand != null && operand.Type == OperandType.Register) + { + int key = GetKeyFromRegister(operand.GetRegister()); + + Operand local = localDefs.Get(key); + + if (local != null) + { + return local; + } + + operand = FindDefinitionForCurr(globalDefs, block, operand.GetRegister()); + + localDefs.Add(key, operand); + } + + return operand; + } + + for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next) + { + if (node.Value is Operation operation) + { + for (int index = 0; index < operation.SourcesCount; index++) + { + operation.SetSource(index, RenameGlobal(operation.GetSource(index))); + } + } + } + + if (blkIndex < blocks.Length - 1) + { + localDefs.Clear(); + } + } + } + + private static Operand FindDefinitionForCurr(DefMap[] globalDefs, BasicBlock current, Register reg) + { + if (globalDefs[current.Index].HasPhi(reg)) + { + return InsertPhi(globalDefs, current, reg); + } + + if (current != current.ImmediateDominator) + { + return FindDefinition(globalDefs, current.ImmediateDominator, reg).Local; + } + + return Undef(); + } + + private static Definition FindDefinition(DefMap[] globalDefs, BasicBlock current, Register reg) + { + foreach (BasicBlock block in SelfAndImmediateDominators(current)) + { + DefMap defMap = globalDefs[block.Index]; + + if (defMap.TryGetOperand(reg, out Operand lastDef)) + { + return new Definition(block, lastDef); + } + + if (defMap.HasPhi(reg)) + { + return new Definition(block, InsertPhi(globalDefs, block, reg)); + } + } + + return new Definition(current, Undef()); + } + + private static IEnumerable<BasicBlock> SelfAndImmediateDominators(BasicBlock block) + { + while (block != block.ImmediateDominator) + { + yield return block; + + block = block.ImmediateDominator; + } + + yield return block; + } + + private static Operand InsertPhi(DefMap[] globalDefs, BasicBlock block, Register reg) + { + // This block has a Phi that has not been materialized yet, but that + // would define a new version of the variable we're looking for. We need + // to materialize the Phi, add all the block/operand pairs into the Phi, and + // then use the definition from that Phi. + Operand local = Local(); + + PhiNode phi = new PhiNode(local); + + AddPhi(block, phi); + + globalDefs[block.Index].TryAddOperand(reg, local); + + foreach (BasicBlock predecessor in block.Predecessors) + { + Definition def = FindDefinition(globalDefs, predecessor, reg); + + phi.AddSource(def.Block, def.Local); + } + + return local; + } + + private static void AddPhi(BasicBlock block, PhiNode phi) + { + LinkedListNode<INode> node = block.Operations.First; + + if (node != null) + { + while (node.Next?.Value is PhiNode) + { + node = node.Next; + } + } + + if (node?.Value is PhiNode) + { + block.Operations.AddAfter(node, phi); + } + else + { + block.Operations.AddFirst(phi); + } + } + + private static int GetKeyFromRegister(Register reg) + { + if (reg.Type == RegisterType.Gpr) + { + return reg.Index; + } + else if (reg.Type == RegisterType.Predicate) + { + return RegisterConsts.GprsCount + reg.Index; + } + else /* if (reg.Type == RegisterType.Flag) */ + { + return GprsAndPredsCount + reg.Index; + } + } + + private static Register GetRegisterFromKey(int key) + { + if (key < RegisterConsts.GprsCount) + { + return new Register(key, RegisterType.Gpr); + } + else if (key < GprsAndPredsCount) + { + return new Register(key - RegisterConsts.GprsCount, RegisterType.Predicate); + } + else /* if (key < RegisterConsts.TotalCount) */ + { + return new Register(key - GprsAndPredsCount, RegisterType.Flag); + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/TargetApi.cs b/src/Ryujinx.Graphics.Shader/Translation/TargetApi.cs new file mode 100644 index 00000000..6ac235a4 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/TargetApi.cs @@ -0,0 +1,8 @@ +namespace Ryujinx.Graphics.Shader.Translation +{ + public enum TargetApi + { + OpenGL, + Vulkan + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/TargetLanguage.cs b/src/Ryujinx.Graphics.Shader/Translation/TargetLanguage.cs new file mode 100644 index 00000000..8314b223 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/TargetLanguage.cs @@ -0,0 +1,9 @@ +namespace Ryujinx.Graphics.Shader.Translation +{ + public enum TargetLanguage + { + Glsl, + Spirv, + Arb + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/TranslationFlags.cs b/src/Ryujinx.Graphics.Shader/Translation/TranslationFlags.cs new file mode 100644 index 00000000..1874dec3 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/TranslationFlags.cs @@ -0,0 +1,14 @@ +using System; + +namespace Ryujinx.Graphics.Shader.Translation +{ + [Flags] + public enum TranslationFlags + { + None = 0, + + VertexA = 1 << 0, + Compute = 1 << 1, + DebugMode = 1 << 2 + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/TranslationOptions.cs b/src/Ryujinx.Graphics.Shader/Translation/TranslationOptions.cs new file mode 100644 index 00000000..d9829ac4 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/TranslationOptions.cs @@ -0,0 +1,16 @@ +namespace Ryujinx.Graphics.Shader.Translation +{ + public readonly struct TranslationOptions + { + public TargetLanguage TargetLanguage { get; } + public TargetApi TargetApi { get; } + public TranslationFlags Flags { get; } + + public TranslationOptions(TargetLanguage targetLanguage, TargetApi targetApi, TranslationFlags flags) + { + TargetLanguage = targetLanguage; + TargetApi = targetApi; + Flags = flags; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/Translator.cs b/src/Ryujinx.Graphics.Shader/Translation/Translator.cs new file mode 100644 index 00000000..77d3b568 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Translator.cs @@ -0,0 +1,362 @@ +using Ryujinx.Graphics.Shader.CodeGen.Glsl; +using Ryujinx.Graphics.Shader.CodeGen.Spirv; +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation.Optimizations; +using System; +using System.Linq; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation +{ + public static class Translator + { + private const int HeaderSize = 0x50; + + internal readonly struct FunctionCode + { + public Operation[] Code { get; } + + public FunctionCode(Operation[] code) + { + Code = code; + } + } + + public static TranslatorContext CreateContext(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options) + { + return DecodeShader(address, gpuAccessor, options); + } + + internal static ShaderProgram Translate(FunctionCode[] functions, ShaderConfig config) + { + var cfgs = new ControlFlowGraph[functions.Length]; + var frus = new RegisterUsage.FunctionRegisterUsage[functions.Length]; + + for (int i = 0; i < functions.Length; i++) + { + cfgs[i] = ControlFlowGraph.Create(functions[i].Code); + + if (i != 0) + { + frus[i] = RegisterUsage.RunPass(cfgs[i]); + } + } + + Function[] funcs = new Function[functions.Length]; + + for (int i = 0; i < functions.Length; i++) + { + var cfg = cfgs[i]; + + int inArgumentsCount = 0; + int outArgumentsCount = 0; + + if (i != 0) + { + var fru = frus[i]; + + inArgumentsCount = fru.InArguments.Length; + outArgumentsCount = fru.OutArguments.Length; + } + + if (cfg.Blocks.Length != 0) + { + RegisterUsage.FixupCalls(cfg.Blocks, frus); + + Dominance.FindDominators(cfg); + Dominance.FindDominanceFrontiers(cfg.Blocks); + + Ssa.Rename(cfg.Blocks); + + Optimizer.RunPass(cfg.Blocks, config); + Rewriter.RunPass(cfg.Blocks, config); + } + + funcs[i] = new Function(cfg.Blocks, $"fun{i}", false, inArgumentsCount, outArgumentsCount); + } + + var identification = ShaderIdentifier.Identify(funcs, config); + + var sInfo = StructuredProgram.MakeStructuredProgram(funcs, config); + + var info = config.CreateProgramInfo(identification); + + return config.Options.TargetLanguage switch + { + TargetLanguage.Glsl => new ShaderProgram(info, TargetLanguage.Glsl, GlslGenerator.Generate(sInfo, config)), + TargetLanguage.Spirv => new ShaderProgram(info, TargetLanguage.Spirv, SpirvGenerator.Generate(sInfo, config)), + _ => throw new NotImplementedException(config.Options.TargetLanguage.ToString()) + }; + } + + private static TranslatorContext DecodeShader(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options) + { + ShaderConfig config; + DecodedProgram program; + ulong maxEndAddress = 0; + + if (options.Flags.HasFlag(TranslationFlags.Compute)) + { + config = new ShaderConfig(gpuAccessor, options); + + program = Decoder.Decode(config, address); + } + else + { + config = new ShaderConfig(new ShaderHeader(gpuAccessor, address), gpuAccessor, options); + + program = Decoder.Decode(config, address + HeaderSize); + } + + foreach (DecodedFunction function in program) + { + foreach (Block block in function.Blocks) + { + if (maxEndAddress < block.EndAddress) + { + maxEndAddress = block.EndAddress; + } + } + } + + config.SizeAdd((int)maxEndAddress + (options.Flags.HasFlag(TranslationFlags.Compute) ? 0 : HeaderSize)); + + return new TranslatorContext(address, program, config); + } + + internal static FunctionCode[] EmitShader(DecodedProgram program, ShaderConfig config, bool initializeOutputs, out int initializationOperations) + { + initializationOperations = 0; + + FunctionMatch.RunPass(program); + + foreach (DecodedFunction function in program.OrderBy(x => x.Address).Where(x => !x.IsCompilerGenerated)) + { + program.AddFunctionAndSetId(function); + } + + FunctionCode[] functions = new FunctionCode[program.FunctionsWithIdCount]; + + for (int index = 0; index < functions.Length; index++) + { + EmitterContext context = new EmitterContext(program, config, index != 0); + + if (initializeOutputs && index == 0) + { + EmitOutputsInitialization(context, config); + initializationOperations = context.OperationsCount; + } + + DecodedFunction function = program.GetFunctionById(index); + + foreach (Block block in function.Blocks) + { + context.CurrBlock = block; + + context.EnterBlock(block.Address); + + EmitOps(context, block); + } + + functions[index] = new FunctionCode(context.GetOperations()); + } + + return functions; + } + + private static void EmitOutputsInitialization(EmitterContext context, ShaderConfig config) + { + // Compute has no output attributes, and fragment is the last stage, so we + // don't need to initialize outputs on those stages. + if (config.Stage == ShaderStage.Compute || config.Stage == ShaderStage.Fragment) + { + return; + } + + if (config.Stage == ShaderStage.Vertex) + { + InitializePositionOutput(context); + } + + UInt128 usedAttributes = context.Config.NextInputAttributesComponents; + while (usedAttributes != UInt128.Zero) + { + int index = (int)UInt128.TrailingZeroCount(usedAttributes); + int vecIndex = index / 4; + + usedAttributes &= ~(UInt128.One << index); + + // We don't need to initialize passthrough attributes. + if ((context.Config.PassthroughAttributes & (1 << vecIndex)) != 0) + { + continue; + } + + InitializeOutputComponent(context, vecIndex, index & 3, perPatch: false); + } + + if (context.Config.NextUsedInputAttributesPerPatch != null) + { + foreach (int vecIndex in context.Config.NextUsedInputAttributesPerPatch.Order()) + { + InitializeOutput(context, vecIndex, perPatch: true); + } + } + + if (config.NextUsesFixedFuncAttributes) + { + bool supportsLayerFromVertexOrTess = config.GpuAccessor.QueryHostSupportsLayerVertexTessellation(); + int fixedStartAttr = supportsLayerFromVertexOrTess ? 0 : 1; + + for (int i = fixedStartAttr; i < fixedStartAttr + 5 + AttributeConsts.TexCoordCount; i++) + { + int index = config.GetFreeUserAttribute(isOutput: true, i); + if (index < 0) + { + break; + } + + InitializeOutput(context, index, perPatch: false); + + config.SetOutputUserAttributeFixedFunc(index); + } + } + } + + private static void InitializePositionOutput(EmitterContext context) + { + for (int c = 0; c < 4; c++) + { + context.Store(StorageKind.Output, IoVariable.Position, null, Const(c), ConstF(c == 3 ? 1f : 0f)); + } + } + + private static void InitializeOutput(EmitterContext context, int location, bool perPatch) + { + for (int c = 0; c < 4; c++) + { + InitializeOutputComponent(context, location, c, perPatch); + } + } + + private static void InitializeOutputComponent(EmitterContext context, int location, int c, bool perPatch) + { + StorageKind storageKind = perPatch ? StorageKind.OutputPerPatch : StorageKind.Output; + + if (context.Config.UsedFeatures.HasFlag(FeatureFlags.OaIndexing)) + { + Operand invocationId = null; + + if (context.Config.Stage == ShaderStage.TessellationControl && !perPatch) + { + invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId); + } + + int index = location * 4 + c; + + context.Store(storageKind, IoVariable.UserDefined, invocationId, Const(index), ConstF(c == 3 ? 1f : 0f)); + } + else + { + if (context.Config.Stage == ShaderStage.TessellationControl && !perPatch) + { + Operand invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId); + context.Store(storageKind, IoVariable.UserDefined, Const(location), invocationId, Const(c), ConstF(c == 3 ? 1f : 0f)); + } + else + { + context.Store(storageKind, IoVariable.UserDefined, null, Const(location), Const(c), ConstF(c == 3 ? 1f : 0f)); + } + } + } + + private static void EmitOps(EmitterContext context, Block block) + { + for (int opIndex = 0; opIndex < block.OpCodes.Count; opIndex++) + { + InstOp op = block.OpCodes[opIndex]; + + if (context.Config.Options.Flags.HasFlag(TranslationFlags.DebugMode)) + { + string instName; + + if (op.Emitter != null) + { + instName = op.Name.ToString(); + } + else + { + instName = "???"; + + context.Config.GpuAccessor.Log($"Invalid instruction at 0x{op.Address:X6} (0x{op.RawOpCode:X16})."); + } + + string dbgComment = $"0x{op.Address:X6}: 0x{op.RawOpCode:X16} {instName}"; + + context.Add(new CommentNode(dbgComment)); + } + + InstConditional opConditional = new InstConditional(op.RawOpCode); + + bool noPred = op.Props.HasFlag(InstProps.NoPred); + if (!noPred && opConditional.Pred == RegisterConsts.PredicateTrueIndex && opConditional.PredInv) + { + continue; + } + + Operand predSkipLbl = null; + + if (Decoder.IsPopBranch(op.Name)) + { + // If the instruction is a SYNC or BRK instruction with only one + // possible target address, then the instruction is basically + // just a simple branch, we can generate code similar to branch + // instructions, with the condition check on the branch itself. + noPred = block.SyncTargets.Count <= 1; + } + else if (op.Name == InstName.Bra) + { + noPred = true; + } + + if (!(opConditional.Pred == RegisterConsts.PredicateTrueIndex || noPred)) + { + Operand label; + + if (opIndex == block.OpCodes.Count - 1 && block.HasNext()) + { + label = context.GetLabel(block.Successors[0].Address); + } + else + { + label = Label(); + + predSkipLbl = label; + } + + Operand pred = Register(opConditional.Pred, RegisterType.Predicate); + + if (opConditional.PredInv) + { + context.BranchIfTrue(label, pred); + } + else + { + context.BranchIfFalse(label, pred); + } + } + + context.CurrOp = op; + + op.Emitter?.Invoke(context); + + if (predSkipLbl != null) + { + context.MarkLabel(predSkipLbl); + } + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs new file mode 100644 index 00000000..4b4cc8d9 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs @@ -0,0 +1,255 @@ +using Ryujinx.Graphics.Shader.CodeGen.Glsl; +using Ryujinx.Graphics.Shader.CodeGen.Spirv; +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Numerics; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; +using static Ryujinx.Graphics.Shader.Translation.Translator; + +namespace Ryujinx.Graphics.Shader.Translation +{ + public class TranslatorContext + { + private readonly DecodedProgram _program; + private ShaderConfig _config; + + public ulong Address { get; } + + public ShaderStage Stage => _config.Stage; + public int Size => _config.Size; + public int Cb1DataSize => _config.Cb1DataSize; + public bool LayerOutputWritten => _config.LayerOutputWritten; + + public IGpuAccessor GpuAccessor => _config.GpuAccessor; + + internal TranslatorContext(ulong address, DecodedProgram program, ShaderConfig config) + { + Address = address; + _program = program; + _config = config; + } + + private static bool IsLoadUserDefined(Operation operation) + { + // TODO: Check if sources count match and all sources are constant. + return operation.Inst == Instruction.Load && (IoVariable)operation.GetSource(0).Value == IoVariable.UserDefined; + } + + private static bool IsStoreUserDefined(Operation operation) + { + // TODO: Check if sources count match and all sources are constant. + return operation.Inst == Instruction.Store && (IoVariable)operation.GetSource(0).Value == IoVariable.UserDefined; + } + + private static FunctionCode[] Combine(FunctionCode[] a, FunctionCode[] b, int aStart) + { + // Here we combine two shaders. + // For shader A: + // - All user attribute stores on shader A are turned into copies to a + // temporary variable. It's assumed that shader B will consume them. + // - All return instructions are turned into branch instructions, the + // branch target being the start of the shader B code. + // For shader B: + // - All user attribute loads on shader B are turned into copies from a + // temporary variable, as long that attribute is written by shader A. + FunctionCode[] output = new FunctionCode[a.Length + b.Length - 1]; + + List<Operation> ops = new List<Operation>(a.Length + b.Length); + + Operand[] temps = new Operand[AttributeConsts.UserAttributesCount * 4]; + + Operand lblB = Label(); + + for (int index = aStart; index < a[0].Code.Length; index++) + { + Operation operation = a[0].Code[index]; + + if (IsStoreUserDefined(operation)) + { + int tIndex = operation.GetSource(1).Value * 4 + operation.GetSource(2).Value; + + Operand temp = temps[tIndex]; + + if (temp == null) + { + temp = Local(); + + temps[tIndex] = temp; + } + + operation.Dest = temp; + operation.TurnIntoCopy(operation.GetSource(operation.SourcesCount - 1)); + } + + if (operation.Inst == Instruction.Return) + { + ops.Add(new Operation(Instruction.Branch, lblB)); + } + else + { + ops.Add(operation); + } + } + + ops.Add(new Operation(Instruction.MarkLabel, lblB)); + + for (int index = 0; index < b[0].Code.Length; index++) + { + Operation operation = b[0].Code[index]; + + if (IsLoadUserDefined(operation)) + { + int tIndex = operation.GetSource(1).Value * 4 + operation.GetSource(2).Value; + + Operand temp = temps[tIndex]; + + if (temp != null) + { + operation.TurnIntoCopy(temp); + } + } + + ops.Add(operation); + } + + output[0] = new FunctionCode(ops.ToArray()); + + for (int i = 1; i < a.Length; i++) + { + output[i] = a[i]; + } + + for (int i = 1; i < b.Length; i++) + { + output[a.Length + i - 1] = b[i]; + } + + return output; + } + + public void SetNextStage(TranslatorContext nextStage) + { + _config.MergeFromtNextStage(nextStage._config); + } + + public void SetGeometryShaderLayerInputAttribute(int attr) + { + _config.SetGeometryShaderLayerInputAttribute(attr); + } + + public void SetLastInVertexPipeline() + { + _config.SetLastInVertexPipeline(); + } + + public ShaderProgram Translate(TranslatorContext other = null) + { + FunctionCode[] code = EmitShader(_program, _config, initializeOutputs: other == null, out _); + + if (other != null) + { + other._config.MergeOutputUserAttributes(_config.UsedOutputAttributes, Enumerable.Empty<int>()); + + FunctionCode[] otherCode = EmitShader(other._program, other._config, initializeOutputs: true, out int aStart); + + code = Combine(otherCode, code, aStart); + + _config.InheritFrom(other._config); + } + + return Translator.Translate(code, _config); + } + + public ShaderProgram GenerateGeometryPassthrough() + { + int outputAttributesMask = _config.UsedOutputAttributes; + int layerOutputAttr = _config.LayerOutputAttribute; + + OutputTopology outputTopology; + int maxOutputVertices; + + switch (GpuAccessor.QueryPrimitiveTopology()) + { + case InputTopology.Points: + outputTopology = OutputTopology.PointList; + maxOutputVertices = 1; + break; + case InputTopology.Lines: + case InputTopology.LinesAdjacency: + outputTopology = OutputTopology.LineStrip; + maxOutputVertices = 2; + break; + default: + outputTopology = OutputTopology.TriangleStrip; + maxOutputVertices = 3; + break; + } + + ShaderConfig config = new ShaderConfig(ShaderStage.Geometry, outputTopology, maxOutputVertices, GpuAccessor, _config.Options); + + EmitterContext context = new EmitterContext(default, config, false); + + for (int v = 0; v < maxOutputVertices; v++) + { + int outAttrsMask = outputAttributesMask; + + while (outAttrsMask != 0) + { + int attrIndex = BitOperations.TrailingZeroCount(outAttrsMask); + + outAttrsMask &= ~(1 << attrIndex); + + for (int c = 0; c < 4; c++) + { + int attr = AttributeConsts.UserAttributeBase + attrIndex * 16 + c * 4; + + Operand value = context.Load(StorageKind.Input, IoVariable.UserDefined, Const(attrIndex), Const(v), Const(c)); + + if (attr == layerOutputAttr) + { + context.Store(StorageKind.Output, IoVariable.Layer, null, value); + } + else + { + context.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(attrIndex), Const(c), value); + config.SetOutputUserAttribute(attrIndex); + } + + config.SetInputUserAttribute(attrIndex, c); + } + } + + for (int c = 0; c < 4; c++) + { + Operand value = context.Load(StorageKind.Input, IoVariable.Position, Const(v), Const(c)); + + context.Store(StorageKind.Output, IoVariable.Position, null, Const(c), value); + } + + context.EmitVertex(); + } + + context.EndPrimitive(); + + var operations = context.GetOperations(); + var cfg = ControlFlowGraph.Create(operations); + var function = new Function(cfg.Blocks, "main", false, 0, 0); + + var sInfo = StructuredProgram.MakeStructuredProgram(new[] { function }, config); + + var info = config.CreateProgramInfo(); + + return config.Options.TargetLanguage switch + { + TargetLanguage.Glsl => new ShaderProgram(info, TargetLanguage.Glsl, GlslGenerator.Generate(sInfo, config)), + TargetLanguage.Spirv => new ShaderProgram(info, TargetLanguage.Spirv, SpirvGenerator.Generate(sInfo, config)), + _ => throw new NotImplementedException(config.Options.TargetLanguage.ToString()) + }; + } + } +} |
