diff options
Diffstat (limited to 'ARMeilleure/Instructions/InstEmitSimdCmp32.cs')
| -rw-r--r-- | ARMeilleure/Instructions/InstEmitSimdCmp32.cs | 166 |
1 files changed, 154 insertions, 12 deletions
diff --git a/ARMeilleure/Instructions/InstEmitSimdCmp32.cs b/ARMeilleure/Instructions/InstEmitSimdCmp32.cs index 3b2483ce..a4f64ad6 100644 --- a/ARMeilleure/Instructions/InstEmitSimdCmp32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdCmp32.cs @@ -5,6 +5,7 @@ using ARMeilleure.Translation; using System; using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; using static ARMeilleure.Instructions.InstEmitSimdHelper32; using static ARMeilleure.IntermediateRepresentation.OperandHelper; @@ -16,7 +17,14 @@ namespace ARMeilleure.Instructions { public static void Vceq_V(ArmEmitterContext context) { - EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, false); + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2CmpOpF32(context, CmpCondition.Equal, false); + } + else + { + EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, false); + } } public static void Vceq_I(ArmEmitterContext context) @@ -30,7 +38,14 @@ namespace ARMeilleure.Instructions if (op.F) { - EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, true); + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2CmpOpF32(context, CmpCondition.Equal, true); + } + else + { + EmitCmpOpF32(context, SoftFloat32.FPCompareEQFpscr, SoftFloat64.FPCompareEQFpscr, true); + } } else { @@ -40,7 +55,14 @@ namespace ARMeilleure.Instructions public static void Vcge_V(ArmEmitterContext context) { - EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, false); + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2CmpOpF32(context, CmpCondition.GreaterThanOrEqual, false); + } + else + { + EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, false); + } } public static void Vcge_I(ArmEmitterContext context) @@ -56,7 +78,14 @@ namespace ARMeilleure.Instructions if (op.F) { - EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, true); + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2CmpOpF32(context, CmpCondition.GreaterThanOrEqual, true); + } + else + { + EmitCmpOpF32(context, SoftFloat32.FPCompareGEFpscr, SoftFloat64.FPCompareGEFpscr, true); + } } else { @@ -66,7 +95,14 @@ namespace ARMeilleure.Instructions public static void Vcgt_V(ArmEmitterContext context) { - EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, false); + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2CmpOpF32(context, CmpCondition.GreaterThan, false); + } + else + { + EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, false); + } } public static void Vcgt_I(ArmEmitterContext context) @@ -82,7 +118,14 @@ namespace ARMeilleure.Instructions if (op.F) { - EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, true); + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2CmpOpF32(context, CmpCondition.GreaterThan, true); + } + else + { + EmitCmpOpF32(context, SoftFloat32.FPCompareGTFpscr, SoftFloat64.FPCompareGTFpscr, true); + } } else { @@ -96,7 +139,14 @@ namespace ARMeilleure.Instructions if (op.F) { - EmitCmpOpF32(context, SoftFloat32.FPCompareLEFpscr, SoftFloat64.FPCompareLEFpscr, true); + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2CmpOpF32(context, CmpCondition.LessThanOrEqual, true); + } + else + { + EmitCmpOpF32(context, SoftFloat32.FPCompareLEFpscr, SoftFloat64.FPCompareLEFpscr, true); + } } else { @@ -110,7 +160,14 @@ namespace ARMeilleure.Instructions if (op.F) { - EmitCmpOpF32(context, SoftFloat32.FPCompareLTFpscr, SoftFloat64.FPCompareLTFpscr, true); + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2CmpOpF32(context, CmpCondition.LessThan, true); + } + else + { + EmitCmpOpF32(context, SoftFloat32.FPCompareLTFpscr, SoftFloat64.FPCompareLTFpscr, true); + } } else { @@ -224,23 +281,77 @@ namespace ARMeilleure.Instructions OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; bool cmpWithZero = (op.Opc & 2) != 0; + int sizeF = op.Size & 1; + + if (Optimizations.FastFP && (signalNaNs ? Optimizations.UseAvx : Optimizations.UseSse2)) + { + CmpCondition cmpOrdered = signalNaNs ? CmpCondition.OrderedS : CmpCondition.OrderedQ; + + bool doubleSize = sizeF != 0; + int shift = doubleSize ? 1 : 2; + Operand m = GetVecA32(op.Vm >> shift); + Operand n = GetVecA32(op.Vd >> shift); + + n = EmitSwapScalar(context, n, op.Vd, doubleSize); + m = cmpWithZero ? context.VectorZero() : EmitSwapScalar(context, m, op.Vm, doubleSize); + + Operand lblNaN = Label(); + Operand lblEnd = Label(); + + if (!doubleSize) + { + Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const((int)cmpOrdered)); + + Operand isOrdered = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, ordMask); + + context.BranchIfFalse(lblNaN, isOrdered); + + Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comissge, n, m); + Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisseq, n, m); + Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisslt, n, m); + + EmitSetFPSCRFlags(context, nf, zf, cf, Const(0)); + } + else + { + Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const((int)cmpOrdered)); + + Operand isOrdered = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, ordMask); + + context.BranchIfFalse(lblNaN, isOrdered); + + Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comisdge, n, m); + Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisdeq, n, m); + Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisdlt, n, m); + + EmitSetFPSCRFlags(context, nf, zf, cf, Const(0)); + } + + context.Branch(lblEnd); + + context.MarkLabel(lblNaN); + + EmitSetFPSCRFlags(context, Const(3)); + + context.MarkLabel(lblEnd); + } + else { - int fSize = op.Size & 1; - OperandType type = fSize != 0 ? OperandType.FP64 : OperandType.FP32; + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; Operand ne = ExtractScalar(context, type, op.Vd); Operand me; if (cmpWithZero) { - me = fSize == 0 ? ConstF(0f) : ConstF(0d); + me = sizeF == 0 ? ConstF(0f) : ConstF(0d); } else { me = ExtractScalar(context, type, op.Vm); } - Delegate dlg = fSize != 0 + Delegate dlg = sizeF != 0 ? (Delegate)new _S32_F64_F64_Bool(SoftFloat64.FPCompare) : (Delegate)new _S32_F32_F32_Bool(SoftFloat32.FPCompare); @@ -269,5 +380,36 @@ namespace ARMeilleure.Instructions SetFpFlag(context, FPState.ZFlag, Extract(nzcv, 2)); SetFpFlag(context, FPState.NFlag, Extract(nzcv, 3)); } + + private static void EmitSetFPSCRFlags(ArmEmitterContext context, Operand n, Operand z, Operand c, Operand v) + { + SetFpFlag(context, FPState.VFlag, v); + SetFpFlag(context, FPState.CFlag, c); + SetFpFlag(context, FPState.ZFlag, z); + SetFpFlag(context, FPState.NFlag, n); + } + + private static void EmitSse2CmpOpF32(ArmEmitterContext context, CmpCondition cond, bool zero) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + int sizeF = op.Size & 1; + Intrinsic inst = (sizeF == 0) ? Intrinsic.X86Cmpps : Intrinsic.X86Cmppd; + + if (zero) + { + EmitVectorUnaryOpSimd32(context, (m) => + { + return context.AddIntrinsic(inst, m, context.VectorZero(), Const((int)cond)); + }); + } + else + { + EmitVectorBinaryOpSimd32(context, (n, m) => + { + return context.AddIntrinsic(inst, n, m, Const((int)cond)); + }); + } + } } } |
