From 6b23a2c125b9c48b5ebea92716004ef68698bb0f Mon Sep 17 00:00:00 2001 From: gdkchan Date: Wed, 17 Apr 2019 20:57:08 -0300 Subject: New shader translator implementation (#654) * Start implementing a new shader translator * Fix shift instructions and a typo * Small refactoring on StructuredProgram, move RemovePhis method to a separate class * Initial geometry shader support * Implement TLD4 * Fix -- There's no negation on FMUL32I * Add constant folding and algebraic simplification optimizations, nits * Some leftovers from constant folding * Avoid cast for constant assignments * Add a branch elimination pass, and misc small fixes * Remove redundant branches, add expression propagation and other improvements on the code * Small leftovers -- add missing break and continue, remove unused properties, other improvements * Add null check to handle empty block cases on block visitor * Add HADD2 and HMUL2 half float shader instructions * Optimize pack/unpack sequences, some fixes related to half float instructions * Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen * Fix copy paste mistake that caused RZ to be ignored on the AST instruction * Add workaround for conditional exit, and fix half float instruction with constant buffer * Add missing 0.0 source for TLDS.LZ variants * Simplify the switch for TLDS.LZ * Texture instructions related fixes * Implement the HFMA instruction, and some misc. fixes * Enable constant folding on UnpackHalf2x16 instructions * Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods * Remove the old shader translator * Remove ShaderDeclInfo and other unused things * Add dual vertex shader support * Add ShaderConfig, used to pass shader type and maximum cbuffer size * Move and rename some instruction enums * Move texture instructions into a separate file * Move operand GetExpression and locals management to OperandManager * Optimize opcode decoding using a simple list and binary search * Add missing condition for do-while on goto elimination * Misc. fixes on texture instructions * Simplify TLDS switch * Address PR feedback, and a nit --- .../Shader/Instructions/InstEmitConversion.cs | 213 +++++++++++++++++++++ 1 file changed, 213 insertions(+) create mode 100644 Ryujinx.Graphics/Shader/Instructions/InstEmitConversion.cs (limited to 'Ryujinx.Graphics/Shader/Instructions/InstEmitConversion.cs') diff --git a/Ryujinx.Graphics/Shader/Instructions/InstEmitConversion.cs b/Ryujinx.Graphics/Shader/Instructions/InstEmitConversion.cs new file mode 100644 index 00000000..f5e9af03 --- /dev/null +++ b/Ryujinx.Graphics/Shader/Instructions/InstEmitConversion.cs @@ -0,0 +1,213 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void F2F(EmitterContext context) + { + OpCodeFArith op = (OpCodeFArith)context.CurrOp; + + FPType srcType = (FPType)op.RawOpCode.Extract(8, 2); + FPType dstType = (FPType)op.RawOpCode.Extract(10, 2); + + bool pass = op.RawOpCode.Extract(40); + bool negateB = op.RawOpCode.Extract(45); + bool absoluteB = op.RawOpCode.Extract(49); + + pass &= op.RoundingMode == RoundingMode.TowardsNegativeInfinity; + + Operand srcB = context.FPAbsNeg(GetSrcB(context, srcType), absoluteB, negateB); + + if (!pass) + { + switch (op.RoundingMode) + { + case RoundingMode.TowardsNegativeInfinity: + srcB = context.FPFloor(srcB); + break; + + case RoundingMode.TowardsPositiveInfinity: + srcB = context.FPCeiling(srcB); + break; + + case RoundingMode.TowardsZero: + srcB = context.FPTruncate(srcB); + break; + } + } + + srcB = context.FPSaturate(srcB, op.Saturate); + + WriteFP(context, dstType, srcB); + + //TODO: CC. + } + + public static void F2I(EmitterContext context) + { + OpCodeFArith op = (OpCodeFArith)context.CurrOp; + + IntegerType intType = (IntegerType)op.RawOpCode.Extract(8, 2); + + bool isSmallInt = intType <= IntegerType.U16; + + FPType floatType = (FPType)op.RawOpCode.Extract(10, 2); + + bool isSignedInt = op.RawOpCode.Extract(12); + bool negateB = op.RawOpCode.Extract(45); + bool absoluteB = op.RawOpCode.Extract(49); + + if (isSignedInt) + { + intType |= IntegerType.S8; + } + + Operand srcB = context.FPAbsNeg(GetSrcB(context, floatType), absoluteB, negateB); + + switch (op.RoundingMode) + { + case RoundingMode.TowardsNegativeInfinity: + srcB = context.FPFloor(srcB); + break; + + case RoundingMode.TowardsPositiveInfinity: + srcB = context.FPCeiling(srcB); + break; + + case RoundingMode.TowardsZero: + srcB = context.FPTruncate(srcB); + break; + } + + srcB = context.FPConvertToS32(srcB); + + //TODO: S/U64, conversion overflow handling. + if (intType != IntegerType.S32) + { + int min = GetIntMin(intType); + int max = GetIntMax(intType); + + srcB = isSignedInt + ? context.IClampS32(srcB, Const(min), Const(max)) + : context.IClampU32(srcB, Const(min), Const(max)); + } + + Operand dest = GetDest(context); + + context.Copy(dest, srcB); + + //TODO: CC. + } + + public static void I2F(EmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + FPType dstType = (FPType)op.RawOpCode.Extract(8, 2); + + IntegerType srcType = (IntegerType)op.RawOpCode.Extract(10, 2); + + bool isSmallInt = srcType <= IntegerType.U16; + + bool isSignedInt = op.RawOpCode.Extract(13); + bool negateB = op.RawOpCode.Extract(45); + bool absoluteB = op.RawOpCode.Extract(49); + + Operand srcB = context.IAbsNeg(GetSrcB(context), absoluteB, negateB); + + if (isSmallInt) + { + int size = srcType == IntegerType.U16 ? 16 : 8; + + srcB = isSignedInt + ? context.BitfieldExtractS32(srcB, Const(op.ByteSelection * 8), Const(size)) + : context.BitfieldExtractU32(srcB, Const(op.ByteSelection * 8), Const(size)); + } + + srcB = isSignedInt + ? context.IConvertS32ToFP(srcB) + : context.IConvertU32ToFP(srcB); + + WriteFP(context, dstType, srcB); + + //TODO: CC. + } + + public static void I2I(EmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + IntegerType dstType = (IntegerType)op.RawOpCode.Extract(8, 2); + IntegerType srcType = (IntegerType)op.RawOpCode.Extract(10, 2); + + if (srcType == IntegerType.U64 || dstType == IntegerType.U64) + { + //TODO: Warning. This instruction doesn't support 64-bits integers + } + + bool srcIsSmallInt = srcType <= IntegerType.U16; + + bool dstIsSignedInt = op.RawOpCode.Extract(12); + bool srcIsSignedInt = op.RawOpCode.Extract(13); + bool negateB = op.RawOpCode.Extract(45); + bool absoluteB = op.RawOpCode.Extract(49); + + Operand srcB = GetSrcB(context); + + if (srcIsSmallInt) + { + int size = srcType == IntegerType.U16 ? 16 : 8; + + srcB = srcIsSignedInt + ? context.BitfieldExtractS32(srcB, Const(op.ByteSelection * 8), Const(size)) + : context.BitfieldExtractU32(srcB, Const(op.ByteSelection * 8), Const(size)); + } + + srcB = context.IAbsNeg(srcB, absoluteB, negateB); + + if (op.Saturate) + { + if (dstIsSignedInt) + { + dstType |= IntegerType.S8; + } + + int min = GetIntMin(dstType); + int max = GetIntMax(dstType); + + srcB = dstIsSignedInt + ? context.IClampS32(srcB, Const(min), Const(max)) + : context.IClampU32(srcB, Const(min), Const(max)); + } + + context.Copy(GetDest(context), srcB); + + //TODO: CC. + } + + private static void WriteFP(EmitterContext context, FPType type, Operand srcB) + { + Operand dest = GetDest(context); + + if (type == FPType.FP32) + { + context.Copy(dest, srcB); + } + else if (type == FPType.FP16) + { + context.Copy(dest, context.PackHalf2x16(srcB, ConstF(0))); + } + else + { + //TODO. + } + } + } +} \ No newline at end of file -- cgit v1.2.3