diff options
| author | gdkchan <gab.dark.100@gmail.com> | 2019-04-17 20:57:08 -0300 |
|---|---|---|
| committer | jduncanator <1518948+jduncanator@users.noreply.github.com> | 2019-04-18 09:57:08 +1000 |
| commit | 6b23a2c125b9c48b5ebea92716004ef68698bb0f (patch) | |
| tree | 69332df6fbbd8e2bddc522ba682fcc5c7a69e101 /Ryujinx.Graphics/Shader/Instructions/InstEmitFArith.cs | |
| parent | b2e88b04a85b41cc60af3485d88c90429e84a218 (diff) | |
New shader translator implementation (#654)
* Start implementing a new shader translator
* Fix shift instructions and a typo
* Small refactoring on StructuredProgram, move RemovePhis method to a separate class
* Initial geometry shader support
* Implement TLD4
* Fix -- There's no negation on FMUL32I
* Add constant folding and algebraic simplification optimizations, nits
* Some leftovers from constant folding
* Avoid cast for constant assignments
* Add a branch elimination pass, and misc small fixes
* Remove redundant branches, add expression propagation and other improvements on the code
* Small leftovers -- add missing break and continue, remove unused properties, other improvements
* Add null check to handle empty block cases on block visitor
* Add HADD2 and HMUL2 half float shader instructions
* Optimize pack/unpack sequences, some fixes related to half float instructions
* Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen
* Fix copy paste mistake that caused RZ to be ignored on the AST instruction
* Add workaround for conditional exit, and fix half float instruction with constant buffer
* Add missing 0.0 source for TLDS.LZ variants
* Simplify the switch for TLDS.LZ
* Texture instructions related fixes
* Implement the HFMA instruction, and some misc. fixes
* Enable constant folding on UnpackHalf2x16 instructions
* Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods
* Remove the old shader translator
* Remove ShaderDeclInfo and other unused things
* Add dual vertex shader support
* Add ShaderConfig, used to pass shader type and maximum cbuffer size
* Move and rename some instruction enums
* Move texture instructions into a separate file
* Move operand GetExpression and locals management to OperandManager
* Optimize opcode decoding using a simple list and binary search
* Add missing condition for do-while on goto elimination
* Misc. fixes on texture instructions
* Simplify TLDS switch
* Address PR feedback, and a nit
Diffstat (limited to 'Ryujinx.Graphics/Shader/Instructions/InstEmitFArith.cs')
| -rw-r--r-- | Ryujinx.Graphics/Shader/Instructions/InstEmitFArith.cs | 369 |
1 files changed, 369 insertions, 0 deletions
diff --git a/Ryujinx.Graphics/Shader/Instructions/InstEmitFArith.cs b/Ryujinx.Graphics/Shader/Instructions/InstEmitFArith.cs new file mode 100644 index 00000000..72492470 --- /dev/null +++ b/Ryujinx.Graphics/Shader/Instructions/InstEmitFArith.cs @@ -0,0 +1,369 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper; +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void Fadd(EmitterContext context) + { + IOpCodeFArith op = (IOpCodeFArith)context.CurrOp; + + bool absoluteA = op.AbsoluteA, absoluteB, negateA, negateB; + + if (op is OpCodeFArithImm32) + { + negateB = op.RawOpCode.Extract(53); + negateA = op.RawOpCode.Extract(56); + absoluteB = op.RawOpCode.Extract(57); + } + else + { + negateB = op.RawOpCode.Extract(45); + negateA = op.RawOpCode.Extract(48); + absoluteB = op.RawOpCode.Extract(49); + } + + Operand srcA = context.FPAbsNeg(GetSrcA(context), absoluteA, negateA); + Operand srcB = context.FPAbsNeg(GetSrcB(context), absoluteB, negateB); + + Operand dest = GetDest(context); + + context.Copy(dest, context.FPSaturate(context.FPAdd(srcA, srcB), op.Saturate)); + + SetFPZnFlags(context, dest, op.SetCondCode); + } + + public static void Ffma(EmitterContext context) + { + IOpCodeFArith op = (IOpCodeFArith)context.CurrOp; + + bool negateB = op.RawOpCode.Extract(48); + bool negateC = op.RawOpCode.Extract(49); + + Operand srcA = GetSrcA(context); + + Operand srcB = context.FPNegate(GetSrcB(context), negateB); + Operand srcC = context.FPNegate(GetSrcC(context), negateC); + + Operand dest = GetDest(context); + + context.Copy(dest, context.FPSaturate(context.FPFusedMultiplyAdd(srcA, srcB, srcC), op.Saturate)); + + SetFPZnFlags(context, dest, op.SetCondCode); + } + + public static void Fmnmx(EmitterContext context) + { + IOpCodeFArith op = (IOpCodeFArith)context.CurrOp; + + bool absoluteA = op.AbsoluteA; + bool negateB = op.RawOpCode.Extract(45); + bool negateA = op.RawOpCode.Extract(48); + bool absoluteB = op.RawOpCode.Extract(49); + + Operand srcA = context.FPAbsNeg(GetSrcA(context), absoluteA, negateA); + Operand srcB = context.FPAbsNeg(GetSrcB(context), absoluteB, negateB); + + Operand resMin = context.FPMinimum(srcA, srcB); + Operand resMax = context.FPMaximum(srcA, srcB); + + Operand pred = GetPredicate39(context); + + Operand dest = GetDest(context); + + context.Copy(dest, context.ConditionalSelect(pred, resMin, resMax)); + + SetFPZnFlags(context, dest, op.SetCondCode); + } + + public static void Fmul(EmitterContext context) + { + IOpCodeFArith op = (IOpCodeFArith)context.CurrOp; + + bool negateB = !(op is OpCodeFArithImm32) && op.RawOpCode.Extract(48); + + Operand srcA = GetSrcA(context); + + Operand srcB = context.FPNegate(GetSrcB(context), negateB); + + switch (op.Scale) + { + case FmulScale.None: break; + + case FmulScale.Divide2: srcA = context.FPDivide (srcA, ConstF(2)); break; + case FmulScale.Divide4: srcA = context.FPDivide (srcA, ConstF(4)); break; + case FmulScale.Divide8: srcA = context.FPDivide (srcA, ConstF(8)); break; + case FmulScale.Multiply2: srcA = context.FPMultiply(srcA, ConstF(2)); break; + case FmulScale.Multiply4: srcA = context.FPMultiply(srcA, ConstF(4)); break; + case FmulScale.Multiply8: srcA = context.FPMultiply(srcA, ConstF(8)); break; + + default: break; //TODO: Warning. + } + + Operand dest = GetDest(context); + + context.Copy(dest, context.FPSaturate(context.FPMultiply(srcA, srcB), op.Saturate)); + + SetFPZnFlags(context, dest, op.SetCondCode); + } + + public static void Fset(EmitterContext context) + { + OpCodeSet op = (OpCodeSet)context.CurrOp; + + Condition cmpOp = (Condition)op.RawOpCode.Extract(48, 4); + + bool negateA = op.RawOpCode.Extract(43); + bool absoluteB = op.RawOpCode.Extract(44); + bool boolFloat = op.RawOpCode.Extract(52); + bool negateB = op.RawOpCode.Extract(53); + bool absoluteA = op.RawOpCode.Extract(54); + + Operand srcA = context.FPAbsNeg(GetSrcA(context), absoluteA, negateA); + Operand srcB = context.FPAbsNeg(GetSrcB(context), absoluteB, negateB); + + Operand res = GetFPComparison(context, cmpOp, srcA, srcB); + + Operand pred = GetPredicate39(context); + + res = GetPredLogicalOp(context, op.LogicalOp, res, pred); + + Operand dest = GetDest(context); + + if (boolFloat) + { + context.Copy(dest, context.ConditionalSelect(res, ConstF(1), Const(0))); + } + else + { + context.Copy(dest, res); + } + + //TODO: CC, X + } + + public static void Fsetp(EmitterContext context) + { + OpCodeSet op = (OpCodeSet)context.CurrOp; + + Condition cmpOp = (Condition)op.RawOpCode.Extract(48, 4); + + bool absoluteA = op.RawOpCode.Extract(7); + bool negateA = op.RawOpCode.Extract(43); + bool absoluteB = op.RawOpCode.Extract(44); + + Operand srcA = context.FPAbsNeg (GetSrcA(context), absoluteA, negateA); + Operand srcB = context.FPAbsolute(GetSrcB(context), absoluteB); + + Operand p0Res = GetFPComparison(context, cmpOp, srcA, srcB); + + Operand p1Res = context.BitwiseNot(p0Res); + + Operand pred = GetPredicate39(context); + + p0Res = GetPredLogicalOp(context, op.LogicalOp, p0Res, pred); + p1Res = GetPredLogicalOp(context, op.LogicalOp, p1Res, pred); + + context.Copy(Register(op.Predicate3), p0Res); + context.Copy(Register(op.Predicate0), p1Res); + } + + public static void Hadd2(EmitterContext context) + { + Hadd2Hmul2Impl(context, isAdd: true); + } + + public static void Hfma2(EmitterContext context) + { + IOpCodeHfma op = (IOpCodeHfma)context.CurrOp; + + Operand[] srcA = GetHfmaSrcA(context); + Operand[] srcB = GetHfmaSrcB(context); + Operand[] srcC = GetHfmaSrcC(context); + + Operand[] res = new Operand[2]; + + for (int index = 0; index < res.Length; index++) + { + res[index] = context.FPFusedMultiplyAdd(srcA[index], srcB[index], srcC[index]); + + res[index] = context.FPSaturate(res[index], op.Saturate); + } + + context.Copy(GetDest(context), GetHalfPacked(context, res)); + } + + public static void Hmul2(EmitterContext context) + { + Hadd2Hmul2Impl(context, isAdd: false); + } + + private static void Hadd2Hmul2Impl(EmitterContext context, bool isAdd) + { + OpCode op = context.CurrOp; + + bool saturate = op.RawOpCode.Extract(op is OpCodeAluImm32 ? 52 : 32); + + Operand[] srcA = GetHalfSrcA(context); + Operand[] srcB = GetHalfSrcB(context); + + Operand[] res = new Operand[2]; + + for (int index = 0; index < res.Length; index++) + { + if (isAdd) + { + res[index] = context.FPAdd(srcA[index], srcB[index]); + } + else + { + res[index] = context.FPMultiply(srcA[index], srcB[index]); + } + + res[index] = context.FPSaturate(res[index], saturate); + } + + context.Copy(GetDest(context), GetHalfPacked(context, res)); + } + + public static void Mufu(EmitterContext context) + { + IOpCodeFArith op = (IOpCodeFArith)context.CurrOp; + + bool negateB = op.RawOpCode.Extract(48); + + Operand res = context.FPAbsNeg(GetSrcA(context), op.AbsoluteA, negateB); + + MufuOperation subOp = (MufuOperation)context.CurrOp.RawOpCode.Extract(20, 4); + + switch (subOp) + { + case MufuOperation.Cosine: + res = context.FPCosine(res); + break; + + case MufuOperation.Sine: + res = context.FPSine(res); + break; + + case MufuOperation.ExponentB2: + res = context.FPExponentB2(res); + break; + + case MufuOperation.LogarithmB2: + res = context.FPLogarithmB2(res); + break; + + case MufuOperation.Reciprocal: + res = context.FPReciprocal(res); + break; + + case MufuOperation.ReciprocalSquareRoot: + res = context.FPReciprocalSquareRoot(res); + break; + + case MufuOperation.SquareRoot: + res = context.FPSquareRoot(res); + break; + + default: /* TODO */ break; + } + + context.Copy(GetDest(context), context.FPSaturate(res, op.Saturate)); + } + + private static Operand GetFPComparison( + EmitterContext context, + Condition cond, + Operand srcA, + Operand srcB) + { + Operand res; + + if (cond == Condition.Always) + { + res = Const(IrConsts.True); + } + else if (cond == Condition.Never) + { + res = Const(IrConsts.False); + } + else if (cond == Condition.Nan || cond == Condition.Number) + { + res = context.BitwiseOr(context.IsNan(srcA), context.IsNan(srcB)); + + if (cond == Condition.Number) + { + res = context.BitwiseNot(res); + } + } + else + { + Instruction inst; + + switch (cond & ~Condition.Nan) + { + case Condition.Less: inst = Instruction.CompareLess; break; + case Condition.Equal: inst = Instruction.CompareEqual; break; + case Condition.LessOrEqual: inst = Instruction.CompareLessOrEqual; break; + case Condition.Greater: inst = Instruction.CompareGreater; break; + case Condition.NotEqual: inst = Instruction.CompareNotEqual; break; + case Condition.GreaterOrEqual: inst = Instruction.CompareGreaterOrEqual; break; + + default: throw new InvalidOperationException($"Unexpected condition \"{cond}\"."); + } + + res = context.Add(inst | Instruction.FP, Local(), srcA, srcB); + + if ((cond & Condition.Nan) != 0) + { + res = context.BitwiseOr(res, context.IsNan(srcA)); + res = context.BitwiseOr(res, context.IsNan(srcB)); + } + } + + return res; + } + + private static void SetFPZnFlags(EmitterContext context, Operand dest, bool setCC) + { + if (setCC) + { + context.Copy(GetZF(context), context.FPCompareEqual(dest, ConstF(0))); + context.Copy(GetNF(context), context.FPCompareLess (dest, ConstF(0))); + } + } + + private static Operand[] GetHfmaSrcA(EmitterContext context) + { + IOpCodeHfma op = (IOpCodeHfma)context.CurrOp; + + return GetHalfSources(context, GetSrcA(context), op.SwizzleA); + } + + private static Operand[] GetHfmaSrcB(EmitterContext context) + { + IOpCodeHfma op = (IOpCodeHfma)context.CurrOp; + + Operand[] operands = GetHalfSources(context, GetSrcB(context), op.SwizzleB); + + return FPAbsNeg(context, operands, false, op.NegateB); + } + + private static Operand[] GetHfmaSrcC(EmitterContext context) + { + IOpCodeHfma op = (IOpCodeHfma)context.CurrOp; + + Operand[] operands = GetHalfSources(context, GetSrcC(context), op.SwizzleC); + + return FPAbsNeg(context, operands, false, op.NegateC); + } + } +}
\ No newline at end of file |
