aboutsummaryrefslogtreecommitdiff
path: root/ARMeilleure/Instructions/InstEmitSimdCmp32.cs
diff options
context:
space:
mode:
Diffstat (limited to 'ARMeilleure/Instructions/InstEmitSimdCmp32.cs')
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdCmp32.cs166
1 files changed, 154 insertions, 12 deletions
diff --git a/ARMeilleure/Instructions/InstEmitSimdCmp32.cs b/ARMeilleure/Instructions/InstEmitSimdCmp32.cs
index 3b2483ce..a4f64ad6 100644
--- a/ARMeilleure/Instructions/InstEmitSimdCmp32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdCmp32.cs
@@ -5,6 +5,7 @@ using ARMeilleure.Translation;
using System;
using static ARMeilleure.Instructions.InstEmitHelper;
+using static ARMeilleure.Instructions.InstEmitSimdHelper;
using static ARMeilleure.Instructions.InstEmitSimdHelper32;
using static ARMeilleure.IntermediateRepresentation.OperandHelper;
@@ -16,7 +17,14 @@ namespace ARMeilleure.Instructions
{
public static void Vceq_V(ArmEmitterContext context)
{
- EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, false);
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2CmpOpF32(context, CmpCondition.Equal, false);
+ }
+ else
+ {
+ EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, false);
+ }
}
public static void Vceq_I(ArmEmitterContext context)
@@ -30,7 +38,14 @@ namespace ARMeilleure.Instructions
if (op.F)
{
- EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, true);
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2CmpOpF32(context, CmpCondition.Equal, true);
+ }
+ else
+ {
+ EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, true);
+ }
}
else
{
@@ -40,7 +55,14 @@ namespace ARMeilleure.Instructions
public static void Vcge_V(ArmEmitterContext context)
{
- EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, false);
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2CmpOpF32(context, CmpCondition.GreaterThanOrEqual, false);
+ }
+ else
+ {
+ EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, false);
+ }
}
public static void Vcge_I(ArmEmitterContext context)
@@ -56,7 +78,14 @@ namespace ARMeilleure.Instructions
if (op.F)
{
- EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, true);
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2CmpOpF32(context, CmpCondition.GreaterThanOrEqual, true);
+ }
+ else
+ {
+ EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, true);
+ }
}
else
{
@@ -66,7 +95,14 @@ namespace ARMeilleure.Instructions
public static void Vcgt_V(ArmEmitterContext context)
{
- EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, false);
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2CmpOpF32(context, CmpCondition.GreaterThan, false);
+ }
+ else
+ {
+ EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, false);
+ }
}
public static void Vcgt_I(ArmEmitterContext context)
@@ -82,7 +118,14 @@ namespace ARMeilleure.Instructions
if (op.F)
{
- EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, true);
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2CmpOpF32(context, CmpCondition.GreaterThan, true);
+ }
+ else
+ {
+ EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, true);
+ }
}
else
{
@@ -96,7 +139,14 @@ namespace ARMeilleure.Instructions
if (op.F)
{
- EmitCmpOpF32(context, SoftFloat32.FPCompareLEFpscr, SoftFloat64.FPCompareLEFpscr, true);
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2CmpOpF32(context, CmpCondition.LessThanOrEqual, true);
+ }
+ else
+ {
+ EmitCmpOpF32(context, SoftFloat32.FPCompareLEFpscr, SoftFloat64.FPCompareLEFpscr, true);
+ }
}
else
{
@@ -110,7 +160,14 @@ namespace ARMeilleure.Instructions
if (op.F)
{
- EmitCmpOpF32(context, SoftFloat32.FPCompareLTFpscr, SoftFloat64.FPCompareLTFpscr, true);
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ EmitSse2CmpOpF32(context, CmpCondition.LessThan, true);
+ }
+ else
+ {
+ EmitCmpOpF32(context, SoftFloat32.FPCompareLTFpscr, SoftFloat64.FPCompareLTFpscr, true);
+ }
}
else
{
@@ -224,23 +281,77 @@ namespace ARMeilleure.Instructions
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
bool cmpWithZero = (op.Opc & 2) != 0;
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && (signalNaNs ? Optimizations.UseAvx : Optimizations.UseSse2))
+ {
+ CmpCondition cmpOrdered = signalNaNs ? CmpCondition.OrderedS : CmpCondition.OrderedQ;
+
+ bool doubleSize = sizeF != 0;
+ int shift = doubleSize ? 1 : 2;
+ Operand m = GetVecA32(op.Vm >> shift);
+ Operand n = GetVecA32(op.Vd >> shift);
+
+ n = EmitSwapScalar(context, n, op.Vd, doubleSize);
+ m = cmpWithZero ? context.VectorZero() : EmitSwapScalar(context, m, op.Vm, doubleSize);
+
+ Operand lblNaN = Label();
+ Operand lblEnd = Label();
+
+ if (!doubleSize)
+ {
+ Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const((int)cmpOrdered));
+
+ Operand isOrdered = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, ordMask);
+
+ context.BranchIfFalse(lblNaN, isOrdered);
+
+ Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comissge, n, m);
+ Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisseq, n, m);
+ Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisslt, n, m);
+
+ EmitSetFPSCRFlags(context, nf, zf, cf, Const(0));
+ }
+ else
+ {
+ Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const((int)cmpOrdered));
+
+ Operand isOrdered = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, ordMask);
+
+ context.BranchIfFalse(lblNaN, isOrdered);
+
+ Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comisdge, n, m);
+ Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisdeq, n, m);
+ Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisdlt, n, m);
+
+ EmitSetFPSCRFlags(context, nf, zf, cf, Const(0));
+ }
+
+ context.Branch(lblEnd);
+
+ context.MarkLabel(lblNaN);
+
+ EmitSetFPSCRFlags(context, Const(3));
+
+ context.MarkLabel(lblEnd);
+ }
+ else
{
- int fSize = op.Size & 1;
- OperandType type = fSize != 0 ? OperandType.FP64 : OperandType.FP32;
+ OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32;
Operand ne = ExtractScalar(context, type, op.Vd);
Operand me;
if (cmpWithZero)
{
- me = fSize == 0 ? ConstF(0f) : ConstF(0d);
+ me = sizeF == 0 ? ConstF(0f) : ConstF(0d);
}
else
{
me = ExtractScalar(context, type, op.Vm);
}
- Delegate dlg = fSize != 0
+ Delegate dlg = sizeF != 0
? (Delegate)new _S32_F64_F64_Bool(SoftFloat64.FPCompare)
: (Delegate)new _S32_F32_F32_Bool(SoftFloat32.FPCompare);
@@ -269,5 +380,36 @@ namespace ARMeilleure.Instructions
SetFpFlag(context, FPState.ZFlag, Extract(nzcv, 2));
SetFpFlag(context, FPState.NFlag, Extract(nzcv, 3));
}
+
+ private static void EmitSetFPSCRFlags(ArmEmitterContext context, Operand n, Operand z, Operand c, Operand v)
+ {
+ SetFpFlag(context, FPState.VFlag, v);
+ SetFpFlag(context, FPState.CFlag, c);
+ SetFpFlag(context, FPState.ZFlag, z);
+ SetFpFlag(context, FPState.NFlag, n);
+ }
+
+ private static void EmitSse2CmpOpF32(ArmEmitterContext context, CmpCondition cond, bool zero)
+ {
+ OpCode32Simd op = (OpCode32Simd)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+ Intrinsic inst = (sizeF == 0) ? Intrinsic.X86Cmpps : Intrinsic.X86Cmppd;
+
+ if (zero)
+ {
+ EmitVectorUnaryOpSimd32(context, (m) =>
+ {
+ return context.AddIntrinsic(inst, m, context.VectorZero(), Const((int)cond));
+ });
+ }
+ else
+ {
+ EmitVectorBinaryOpSimd32(context, (n, m) =>
+ {
+ return context.AddIntrinsic(inst, n, m, Const((int)cond));
+ });
+ }
+ }
}
}