diff options
Diffstat (limited to 'ARMeilleure/Instructions/InstEmitSimdCvt32.cs')
| -rw-r--r-- | ARMeilleure/Instructions/InstEmitSimdCvt32.cs | 443 |
1 files changed, 375 insertions, 68 deletions
diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs index 6ab089cb..4f2139a4 100644 --- a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs @@ -1,9 +1,11 @@ using ARMeilleure.Decoders; using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; using ARMeilleure.Translation; using System; using System.Diagnostics; +using static ARMeilleure.Instructions.InstEmitHelper; using static ARMeilleure.Instructions.InstEmitSimdHelper; using static ARMeilleure.Instructions.InstEmitSimdHelper32; using static ARMeilleure.IntermediateRepresentation.OperandHelper; @@ -63,21 +65,56 @@ namespace ARMeilleure.Instructions if (toInteger) { - EmitVectorUnaryOpF32(context, (op1) => + if (Optimizations.UseSse41) { - return EmitSaturateFloatToInt(context, op1, unsigned); - }); + EmitSse41ConvertVector32(context, FPRoundingMode.TowardsZero, !unsigned); + } + else + { + EmitVectorUnaryOpF32(context, (op1) => + { + return EmitSaturateFloatToInt(context, op1, unsigned); + }); + } } else { - if (unsigned) + if (Optimizations.UseSse2) { - EmitVectorUnaryOpZx32(context, (op1) => EmitFPConvert(context, op1, floatSize, false)); - } + EmitVectorUnaryOpSimd32(context, (n) => + { + if (unsigned) + { + Operand mask = X86GetAllElements(context, 0x47800000); + + Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16)); + res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res); + res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask); + + Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16)); + res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16)); + res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2); + + return context.AddIntrinsic(Intrinsic.X86Addps, res, res2); + } + else + { + return context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n); + } + }); + } else { - EmitVectorUnaryOpSx32(context, (op1) => EmitFPConvert(context, op1, floatSize, true)); + if (unsigned) + { + EmitVectorUnaryOpZx32(context, (op1) => EmitFPConvert(context, op1, floatSize, false)); + } + else + { + EmitVectorUnaryOpSx32(context, (op1) => EmitFPConvert(context, op1, floatSize, true)); + } } + } } @@ -123,44 +160,51 @@ namespace ARMeilleure.Instructions bool unsigned = (op.Opc2 & 1) == 0; bool roundWithFpscr = op.Opc != 1; - Operand toConvert = ExtractScalar(context, floatSize, op.Vm); + if (!roundWithFpscr && Optimizations.UseSse41) + { + EmitSse41ConvertInt32(context, FPRoundingMode.TowardsZero, !unsigned); + } + else + { + Operand toConvert = ExtractScalar(context, floatSize, op.Vm); - Operand asInteger; + Operand asInteger; - // TODO: Fast Path. - if (roundWithFpscr) - { - // These need to get the FPSCR value, so it's worth noting we'd need to do a c# call at some point. - if (floatSize == OperandType.FP64) + // TODO: Fast Path. + if (roundWithFpscr) { - if (unsigned) + if (floatSize == OperandType.FP64) { - asInteger = context.Call(new _U32_F64(SoftFallback.DoubleToUInt32), toConvert); - } + if (unsigned) + { + asInteger = context.Call(new _U32_F64(SoftFallback.DoubleToUInt32), toConvert); + } + else + { + asInteger = context.Call(new _S32_F64(SoftFallback.DoubleToInt32), toConvert); + } + + } else { - asInteger = context.Call(new _S32_F64(SoftFallback.DoubleToInt32), toConvert); + if (unsigned) + { + asInteger = context.Call(new _U32_F32(SoftFallback.FloatToUInt32), toConvert); + } + else + { + asInteger = context.Call(new _S32_F32(SoftFallback.FloatToInt32), toConvert); + } } - } + } else { - if (unsigned) - { - asInteger = context.Call(new _U32_F32(SoftFallback.FloatToUInt32), toConvert); - } - else - { - asInteger = context.Call(new _S32_F32(SoftFallback.FloatToInt32), toConvert); - } + // Round towards zero. + asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned); } - } - else - { - // Round towards zero. - asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned); - } - InsertScalar(context, op.Vd, asInteger); + InsertScalar(context, op.Vd, asInteger); + } } else { @@ -192,6 +236,26 @@ namespace ARMeilleure.Instructions return context.Call(dlg, n, Const((int)roundMode)); } + private static FPRoundingMode RMToRoundMode(int rm) + { + FPRoundingMode roundMode; + switch (rm) + { + case 0b01: + roundMode = FPRoundingMode.ToNearest; + break; + case 0b10: + roundMode = FPRoundingMode.TowardsPlusInfinity; + break; + case 0b11: + roundMode = FPRoundingMode.TowardsMinusInfinity; + break; + default: + throw new ArgumentOutOfRangeException(nameof(rm)); + } + return roundMode; + } + public static void Vcvt_R(ArmEmitterContext context) { OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; @@ -199,30 +263,38 @@ namespace ARMeilleure.Instructions OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32; bool unsigned = (op.Opc & 1) == 0; + int rm = op.Opc2 & 3; - Operand toConvert = ExtractScalar(context, floatSize, op.Vm); - - switch (op.Opc2) + if (Optimizations.UseSse41 && rm != 0b00) { - case 0b00: // Away - toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert); - break; - case 0b01: // Nearest - toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert); - break; - case 0b10: // Towards positive infinity - toConvert = EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, toConvert); - break; - case 0b11: // Towards negative infinity - toConvert = EmitUnaryMathCall(context, MathF.Floor, Math.Floor, toConvert); - break; + EmitSse41ConvertInt32(context, RMToRoundMode(rm), !unsigned); } + else + { + Operand toConvert = ExtractScalar(context, floatSize, op.Vm); + + switch (rm) + { + case 0b00: // Away + toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert); + break; + case 0b01: // Nearest + toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert); + break; + case 0b10: // Towards positive infinity + toConvert = EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, toConvert); + break; + case 0b11: // Towards negative infinity + toConvert = EmitUnaryMathCall(context, MathF.Floor, Math.Floor, toConvert); + break; + } - Operand asInteger; + Operand asInteger; - asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned); + asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned); - InsertScalar(context, op.Vd, asInteger); + InsertScalar(context, op.Vd, asInteger); + } } public static void Vrint_RM(ArmEmitterContext context) @@ -231,30 +303,59 @@ namespace ARMeilleure.Instructions OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32; - Operand toConvert = ExtractScalar(context, floatSize, op.Vm); + int rm = op.Opc2 & 3; - switch (op.Opc2) + if (Optimizations.UseSse2 && rm != 0b00) { - case 0b00: // Away - toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert); - break; - case 0b01: // Nearest - toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert); - break; - case 0b10: // Towards positive infinity - toConvert = EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, toConvert); - break; - case 0b11: // Towards negative infinity - toConvert = EmitUnaryMathCall(context, MathF.Floor, Math.Floor, toConvert); - break; + EmitScalarUnaryOpSimd32(context, (m) => + { + Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd; + + FPRoundingMode roundMode = RMToRoundMode(rm); + + return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(roundMode))); + }); } + else + { + Operand toConvert = ExtractScalar(context, floatSize, op.Vm); + + switch (rm) + { + case 0b00: // Away + toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert); + break; + case 0b01: // Nearest + toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert); + break; + case 0b10: // Towards positive infinity + toConvert = EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, toConvert); + break; + case 0b11: // Towards negative infinity + toConvert = EmitUnaryMathCall(context, MathF.Floor, Math.Floor, toConvert); + break; + } - InsertScalar(context, op.Vd, toConvert); + InsertScalar(context, op.Vd, toConvert); + } } public static void Vrint_Z(ArmEmitterContext context) { - EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Truncate, Math.Truncate, op1)); + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + if (Optimizations.UseSse2) + { + EmitScalarUnaryOpSimd32(context, (m) => + { + Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd; + return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(FPRoundingMode.TowardsZero))); + }); + } + else + { + EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Truncate, Math.Truncate, op1)); + } } private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, OperandType type, bool signed) @@ -270,5 +371,211 @@ namespace ARMeilleure.Instructions return context.ConvertToFPUI(type, value); } } + + private static void EmitSse41ConvertInt32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed) + { + // A port of the similar round function in InstEmitSimdCvt. + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + bool doubleSize = (op.Size & 1) != 0; + int shift = doubleSize ? 1 : 2; + Operand n = GetVecA32(op.Vm >> shift); + n = EmitSwapScalar(context, n, op.Vm, doubleSize); + + if (!doubleSize) + { + Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n); + + nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode))); + + Operand zero = context.VectorZero(); + + Operand nCmp; + Operand nIntOrLong2 = null; + if (!signed) + { + nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); + } + + int fpMaxVal = 0x4F000000; // 2.14748365E9f (2147483648) + + Operand fpMaxValMask = X86GetScalar(context, fpMaxVal); + + Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes); + + if (!signed) + { + nRes = context.AddIntrinsic(Intrinsic.X86Subss, nRes, fpMaxValMask); + + nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); + + nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes); + } + + nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan)); + + Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes); + + Operand dRes; + if (signed) + { + dRes = context.BitwiseExclusiveOr(nIntOrLong, nInt); + } + else + { + dRes = context.BitwiseExclusiveOr(nIntOrLong2, nInt); + dRes = context.Add(dRes, nIntOrLong); + } + + InsertScalar(context, op.Vd, dRes); + } + else + { + Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n); + + nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode))); + + Operand zero = context.VectorZero(); + + Operand nCmp; + Operand nIntOrLong2 = null; + if (!signed) + { + nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); + } + + long fpMaxVal = 0x41E0000000000000L; // 2147483648.0000000d (2147483648) + + Operand fpMaxValMask = X86GetScalar(context, fpMaxVal); + + Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes); + + if (!signed) + { + nRes = context.AddIntrinsic(Intrinsic.X86Subsd, nRes, fpMaxValMask); + + nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); + + nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes); + } + + nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan)); + + Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes); + nLong = context.ConvertI64ToI32(nLong); + + Operand dRes; + if (signed) + { + dRes = context.BitwiseExclusiveOr(nIntOrLong, nLong); + } + else + { + dRes = context.BitwiseExclusiveOr(nIntOrLong2, nLong); + dRes = context.Add(dRes, nIntOrLong); + } + + InsertScalar(context, op.Vd, dRes); + } + } + + private static void EmitSse41ConvertVector32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + EmitVectorUnaryOpSimd32(context, (n) => + { + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n); + + nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode))); + + Operand zero = context.VectorZero(); + Operand nCmp; + if (!signed) + { + nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); + } + + Operand fpMaxValMask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648) + + Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes); + Operand nInt2 = null; + if (!signed) + { + nRes = context.AddIntrinsic(Intrinsic.X86Subps, nRes, fpMaxValMask); + + nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); + + nInt2 = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes); + } + + nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan)); + + if (signed) + { + return context.AddIntrinsic(Intrinsic.X86Pxor, nInt, nRes); + } + else + { + Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt2, nRes); + return context.AddIntrinsic(Intrinsic.X86Paddd, dRes, nInt); + } + } + else /* if (sizeF == 1) */ + { + Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n); + + nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode))); + + Operand zero = context.VectorZero(); + Operand nCmp; + if (!signed) + { + nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); + } + + Operand fpMaxValMask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808) + + Operand nLong = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false); + Operand nLong2 = null; + if (!signed) + { + nRes = context.AddIntrinsic(Intrinsic.X86Subpd, nRes, fpMaxValMask); + + nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); + + nLong2 = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false); + } + + nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan)); + + if (signed) + { + return context.AddIntrinsic(Intrinsic.X86Pxor, nLong, nRes); + } + else + { + Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong2, nRes); + return context.AddIntrinsic(Intrinsic.X86Paddq, dRes, nLong); + } + } + }); + } } } |
