aboutsummaryrefslogtreecommitdiff
path: root/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
diff options
context:
space:
mode:
Diffstat (limited to 'ARMeilleure/Instructions/InstEmitSimdCvt32.cs')
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdCvt32.cs443
1 files changed, 375 insertions, 68 deletions
diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
index 6ab089cb..4f2139a4 100644
--- a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
@@ -1,9 +1,11 @@
using ARMeilleure.Decoders;
using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
using ARMeilleure.Translation;
using System;
using System.Diagnostics;
+using static ARMeilleure.Instructions.InstEmitHelper;
using static ARMeilleure.Instructions.InstEmitSimdHelper;
using static ARMeilleure.Instructions.InstEmitSimdHelper32;
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
@@ -63,21 +65,56 @@ namespace ARMeilleure.Instructions
if (toInteger)
{
- EmitVectorUnaryOpF32(context, (op1) =>
+ if (Optimizations.UseSse41)
{
- return EmitSaturateFloatToInt(context, op1, unsigned);
- });
+ EmitSse41ConvertVector32(context, FPRoundingMode.TowardsZero, !unsigned);
+ }
+ else
+ {
+ EmitVectorUnaryOpF32(context, (op1) =>
+ {
+ return EmitSaturateFloatToInt(context, op1, unsigned);
+ });
+ }
}
else
{
- if (unsigned)
+ if (Optimizations.UseSse2)
{
- EmitVectorUnaryOpZx32(context, (op1) => EmitFPConvert(context, op1, floatSize, false));
- }
+ EmitVectorUnaryOpSimd32(context, (n) =>
+ {
+ if (unsigned)
+ {
+ Operand mask = X86GetAllElements(context, 0x47800000);
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16));
+ res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res);
+ res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask);
+
+ Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16));
+ res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16));
+ res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2);
+
+ return context.AddIntrinsic(Intrinsic.X86Addps, res, res2);
+ }
+ else
+ {
+ return context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n);
+ }
+ });
+ }
else
{
- EmitVectorUnaryOpSx32(context, (op1) => EmitFPConvert(context, op1, floatSize, true));
+ if (unsigned)
+ {
+ EmitVectorUnaryOpZx32(context, (op1) => EmitFPConvert(context, op1, floatSize, false));
+ }
+ else
+ {
+ EmitVectorUnaryOpSx32(context, (op1) => EmitFPConvert(context, op1, floatSize, true));
+ }
}
+
}
}
@@ -123,44 +160,51 @@ namespace ARMeilleure.Instructions
bool unsigned = (op.Opc2 & 1) == 0;
bool roundWithFpscr = op.Opc != 1;
- Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
+ if (!roundWithFpscr && Optimizations.UseSse41)
+ {
+ EmitSse41ConvertInt32(context, FPRoundingMode.TowardsZero, !unsigned);
+ }
+ else
+ {
+ Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
- Operand asInteger;
+ Operand asInteger;
- // TODO: Fast Path.
- if (roundWithFpscr)
- {
- // These need to get the FPSCR value, so it's worth noting we'd need to do a c# call at some point.
- if (floatSize == OperandType.FP64)
+ // TODO: Fast Path.
+ if (roundWithFpscr)
{
- if (unsigned)
+ if (floatSize == OperandType.FP64)
{
- asInteger = context.Call(new _U32_F64(SoftFallback.DoubleToUInt32), toConvert);
- }
+ if (unsigned)
+ {
+ asInteger = context.Call(new _U32_F64(SoftFallback.DoubleToUInt32), toConvert);
+ }
+ else
+ {
+ asInteger = context.Call(new _S32_F64(SoftFallback.DoubleToInt32), toConvert);
+ }
+
+ }
else
{
- asInteger = context.Call(new _S32_F64(SoftFallback.DoubleToInt32), toConvert);
+ if (unsigned)
+ {
+ asInteger = context.Call(new _U32_F32(SoftFallback.FloatToUInt32), toConvert);
+ }
+ else
+ {
+ asInteger = context.Call(new _S32_F32(SoftFallback.FloatToInt32), toConvert);
+ }
}
- }
+ }
else
{
- if (unsigned)
- {
- asInteger = context.Call(new _U32_F32(SoftFallback.FloatToUInt32), toConvert);
- }
- else
- {
- asInteger = context.Call(new _S32_F32(SoftFallback.FloatToInt32), toConvert);
- }
+ // Round towards zero.
+ asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned);
}
- }
- else
- {
- // Round towards zero.
- asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned);
- }
- InsertScalar(context, op.Vd, asInteger);
+ InsertScalar(context, op.Vd, asInteger);
+ }
}
else
{
@@ -192,6 +236,26 @@ namespace ARMeilleure.Instructions
return context.Call(dlg, n, Const((int)roundMode));
}
+ private static FPRoundingMode RMToRoundMode(int rm)
+ {
+ FPRoundingMode roundMode;
+ switch (rm)
+ {
+ case 0b01:
+ roundMode = FPRoundingMode.ToNearest;
+ break;
+ case 0b10:
+ roundMode = FPRoundingMode.TowardsPlusInfinity;
+ break;
+ case 0b11:
+ roundMode = FPRoundingMode.TowardsMinusInfinity;
+ break;
+ default:
+ throw new ArgumentOutOfRangeException(nameof(rm));
+ }
+ return roundMode;
+ }
+
public static void Vcvt_R(ArmEmitterContext context)
{
OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
@@ -199,30 +263,38 @@ namespace ARMeilleure.Instructions
OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
bool unsigned = (op.Opc & 1) == 0;
+ int rm = op.Opc2 & 3;
- Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
-
- switch (op.Opc2)
+ if (Optimizations.UseSse41 && rm != 0b00)
{
- case 0b00: // Away
- toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert);
- break;
- case 0b01: // Nearest
- toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert);
- break;
- case 0b10: // Towards positive infinity
- toConvert = EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, toConvert);
- break;
- case 0b11: // Towards negative infinity
- toConvert = EmitUnaryMathCall(context, MathF.Floor, Math.Floor, toConvert);
- break;
+ EmitSse41ConvertInt32(context, RMToRoundMode(rm), !unsigned);
}
+ else
+ {
+ Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
+
+ switch (rm)
+ {
+ case 0b00: // Away
+ toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert);
+ break;
+ case 0b01: // Nearest
+ toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert);
+ break;
+ case 0b10: // Towards positive infinity
+ toConvert = EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, toConvert);
+ break;
+ case 0b11: // Towards negative infinity
+ toConvert = EmitUnaryMathCall(context, MathF.Floor, Math.Floor, toConvert);
+ break;
+ }
- Operand asInteger;
+ Operand asInteger;
- asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned);
+ asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned);
- InsertScalar(context, op.Vd, asInteger);
+ InsertScalar(context, op.Vd, asInteger);
+ }
}
public static void Vrint_RM(ArmEmitterContext context)
@@ -231,30 +303,59 @@ namespace ARMeilleure.Instructions
OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
- Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
+ int rm = op.Opc2 & 3;
- switch (op.Opc2)
+ if (Optimizations.UseSse2 && rm != 0b00)
{
- case 0b00: // Away
- toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert);
- break;
- case 0b01: // Nearest
- toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert);
- break;
- case 0b10: // Towards positive infinity
- toConvert = EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, toConvert);
- break;
- case 0b11: // Towards negative infinity
- toConvert = EmitUnaryMathCall(context, MathF.Floor, Math.Floor, toConvert);
- break;
+ EmitScalarUnaryOpSimd32(context, (m) =>
+ {
+ Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd;
+
+ FPRoundingMode roundMode = RMToRoundMode(rm);
+
+ return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(roundMode)));
+ });
}
+ else
+ {
+ Operand toConvert = ExtractScalar(context, floatSize, op.Vm);
+
+ switch (rm)
+ {
+ case 0b00: // Away
+ toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert);
+ break;
+ case 0b01: // Nearest
+ toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert);
+ break;
+ case 0b10: // Towards positive infinity
+ toConvert = EmitUnaryMathCall(context, MathF.Ceiling, Math.Ceiling, toConvert);
+ break;
+ case 0b11: // Towards negative infinity
+ toConvert = EmitUnaryMathCall(context, MathF.Floor, Math.Floor, toConvert);
+ break;
+ }
- InsertScalar(context, op.Vd, toConvert);
+ InsertScalar(context, op.Vd, toConvert);
+ }
}
public static void Vrint_Z(ArmEmitterContext context)
{
- EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Truncate, Math.Truncate, op1));
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ if (Optimizations.UseSse2)
+ {
+ EmitScalarUnaryOpSimd32(context, (m) =>
+ {
+ Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd;
+ return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(FPRoundingMode.TowardsZero)));
+ });
+ }
+ else
+ {
+ EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, MathF.Truncate, Math.Truncate, op1));
+ }
}
private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, OperandType type, bool signed)
@@ -270,5 +371,211 @@ namespace ARMeilleure.Instructions
return context.ConvertToFPUI(type, value);
}
}
+
+ private static void EmitSse41ConvertInt32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
+ {
+ // A port of the similar round function in InstEmitSimdCvt.
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+
+ bool doubleSize = (op.Size & 1) != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand n = GetVecA32(op.Vm >> shift);
+ n = EmitSwapScalar(context, n, op.Vm, doubleSize);
+
+ if (!doubleSize)
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
+
+ Operand zero = context.VectorZero();
+
+ Operand nCmp;
+ Operand nIntOrLong2 = null;
+ if (!signed)
+ {
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+ }
+
+ int fpMaxVal = 0x4F000000; // 2.14748365E9f (2147483648)
+
+ Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
+
+ Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes);
+
+ if (!signed)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Subss, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes);
+ }
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes);
+
+ Operand dRes;
+ if (signed)
+ {
+ dRes = context.BitwiseExclusiveOr(nIntOrLong, nInt);
+ }
+ else
+ {
+ dRes = context.BitwiseExclusiveOr(nIntOrLong2, nInt);
+ dRes = context.Add(dRes, nIntOrLong);
+ }
+
+ InsertScalar(context, op.Vd, dRes);
+ }
+ else
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
+
+ Operand zero = context.VectorZero();
+
+ Operand nCmp;
+ Operand nIntOrLong2 = null;
+ if (!signed)
+ {
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+ }
+
+ long fpMaxVal = 0x41E0000000000000L; // 2147483648.0000000d (2147483648)
+
+ Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
+
+ Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes);
+
+ if (!signed)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Subsd, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes);
+ }
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes);
+ nLong = context.ConvertI64ToI32(nLong);
+
+ Operand dRes;
+ if (signed)
+ {
+ dRes = context.BitwiseExclusiveOr(nIntOrLong, nLong);
+ }
+ else
+ {
+ dRes = context.BitwiseExclusiveOr(nIntOrLong2, nLong);
+ dRes = context.Add(dRes, nIntOrLong);
+ }
+
+ InsertScalar(context, op.Vd, dRes);
+ }
+ }
+
+ private static void EmitSse41ConvertVector32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ EmitVectorUnaryOpSimd32(context, (n) =>
+ {
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
+
+ Operand zero = context.VectorZero();
+ Operand nCmp;
+ if (!signed)
+ {
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+ }
+
+ Operand fpMaxValMask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648)
+
+ Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
+ Operand nInt2 = null;
+ if (!signed)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Subps, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ nInt2 = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
+ }
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ if (signed)
+ {
+ return context.AddIntrinsic(Intrinsic.X86Pxor, nInt, nRes);
+ }
+ else
+ {
+ Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt2, nRes);
+ return context.AddIntrinsic(Intrinsic.X86Paddd, dRes, nInt);
+ }
+ }
+ else /* if (sizeF == 1) */
+ {
+ Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
+
+ Operand zero = context.VectorZero();
+ Operand nCmp;
+ if (!signed)
+ {
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+ }
+
+ Operand fpMaxValMask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808)
+
+ Operand nLong = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false);
+ Operand nLong2 = null;
+ if (!signed)
+ {
+ nRes = context.AddIntrinsic(Intrinsic.X86Subpd, nRes, fpMaxValMask);
+
+ nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
+ nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
+
+ nLong2 = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false);
+ }
+
+ nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
+
+ if (signed)
+ {
+ return context.AddIntrinsic(Intrinsic.X86Pxor, nLong, nRes);
+ }
+ else
+ {
+ Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong2, nRes);
+ return context.AddIntrinsic(Intrinsic.X86Paddq, dRes, nLong);
+ }
+ }
+ });
+ }
}
}