aboutsummaryrefslogtreecommitdiff
path: root/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs
diff options
context:
space:
mode:
authorLDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>2018-12-26 18:11:36 +0100
committergdkchan <gab.dark.100@gmail.com>2018-12-26 15:11:36 -0200
commit0f5b6dfbe8d4bcc4df3f670e366a967d8ea103db (patch)
tree89fe781d39e9e02534fd455a26008db8a3a14341 /ChocolArm64/Instructions/InstEmitSimdArithmetic.cs
parentd8f2497f155046402cd15c65eca0326faf3aefd6 (diff)
Fix Frecpe_S/V and Frsqrte_S/V (full FP emu.). Add Sse Opt. & SoftFloat Impl. for Fcmeq/ge/gt/le/lt_S/V (Reg & Zero), Faddp_S/V, Fmaxp_V, Fminp_V Inst.; add Sse Opt. for Shll_V, S/Ushll_V Inst.; improve Sse Opt. for Xtn_V Inst.. Add Tests. (#543)
* Update Optimizations.cs * Update InstEmitSimdShift.cs * Update InstEmitSimdHelper.cs * Update InstEmitSimdArithmetic.cs * Update InstEmitSimdMove.cs * Update SoftFloat.cs * Update InstEmitSimdCmp.cs * Update CpuTestSimdShImm.cs * Update CpuTestSimd.cs * Update CpuTestSimdReg.cs * Nit. * Update SoftFloat.cs * Update InstEmitSimdArithmetic.cs * Update InstEmitSimdHelper.cs * Update CpuTestSimd.cs * Explicit some implicit casts. * Simplify some powers; nits. * Update OpCodeTable.cs * Update InstEmitSimdArithmetic.cs * Update CpuTestSimdReg.cs * Update InstEmitSimdArithmetic.cs
Diffstat (limited to 'ChocolArm64/Instructions/InstEmitSimdArithmetic.cs')
-rw-r--r--ChocolArm64/Instructions/InstEmitSimdArithmetic.cs268
1 files changed, 241 insertions, 27 deletions
diff --git a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs
index 013d0432..d1e71ecb 100644
--- a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs
+++ b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs
@@ -176,12 +176,119 @@ namespace ChocolArm64.Instructions
public static void Fabd_S(ILEmitterCtx context)
{
- EmitScalarBinaryOpF(context, () =>
+ if (Optimizations.FastFP && Optimizations.UseSse2)
{
- context.Emit(OpCodes.Sub);
+ OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
- EmitUnaryMathCall(context, nameof(Math.Abs));
- });
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Type[] typesSsv = new Type[] { typeof(float) };
+ Type[] typesSubAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
+
+ context.EmitLdc_R4(-0f);
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv));
+
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesSubAndNot));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot));
+
+ context.EmitStvec(op.Rd);
+
+ EmitVectorZero32_128(context, op.Rd);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Type[] typesSsv = new Type[] { typeof(double) };
+ Type[] typesSubAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
+
+ context.EmitLdc_R8(-0d);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
+
+ EmitLdvecWithCastToDouble(context, op.Rn);
+ EmitLdvecWithCastToDouble(context, op.Rm);
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesSubAndNot));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot));
+
+ EmitStvecWithCastFromDouble(context, op.Rd);
+
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, () =>
+ {
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub));
+
+ EmitUnaryMathCall(context, nameof(Math.Abs));
+ });
+ }
+ }
+
+ public static void Fabd_V(ILEmitterCtx context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Type[] typesSav = new Type[] { typeof(float) };
+ Type[] typesSubAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
+
+ context.EmitLdc_R4(-0f);
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav));
+
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesSubAndNot));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot));
+
+ context.EmitStvec(op.Rd);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
+ else /* if (sizeF == 1) */
+ {
+ Type[] typesSav = new Type[] { typeof(double) };
+ Type[] typesSubAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
+
+ context.EmitLdc_R8(-0d);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ EmitLdvecWithCastToDouble(context, op.Rn);
+ EmitLdvecWithCastToDouble(context, op.Rm);
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAndNot));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot));
+
+ EmitStvecWithCastFromDouble(context, op.Rd);
+ }
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, () =>
+ {
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub));
+
+ EmitUnaryMathCall(context, nameof(Math.Abs));
+ });
+ }
}
public static void Fabs_S(ILEmitterCtx context)
@@ -321,17 +428,60 @@ namespace ChocolArm64.Instructions
int sizeF = op.Size & 1;
- EmitVectorExtractF(context, op.Rn, 0, sizeF);
- EmitVectorExtractF(context, op.Rn, 1, sizeF);
+ if (Optimizations.FastFP && Optimizations.UseSse3)
+ {
+ if (sizeF == 0)
+ {
+ Type[] typesAddH = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
- context.Emit(OpCodes.Add);
+ context.EmitLdvec(op.Rn);
+ context.Emit(OpCodes.Dup);
- EmitScalarSetF(context, op.Rd, sizeF);
+ context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH));
+
+ context.EmitStvec(op.Rd);
+
+ EmitVectorZero32_128(context, op.Rd);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Type[] typesAddH = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
+
+ EmitLdvecWithCastToDouble(context, op.Rn);
+ context.Emit(OpCodes.Dup);
+
+ context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH));
+
+ EmitStvecWithCastFromDouble(context, op.Rd);
+
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
+ else
+ {
+ EmitVectorExtractF(context, op.Rn, 0, sizeF);
+ EmitVectorExtractF(context, op.Rn, 1, sizeF);
+
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd));
+
+ EmitScalarSetF(context, op.Rd, sizeF);
+ }
}
public static void Faddp_V(ILEmitterCtx context)
{
- EmitVectorPairwiseOpF(context, () => context.Emit(OpCodes.Add));
+ if (Optimizations.FastFP && Optimizations.UseSse
+ && Optimizations.UseSse2)
+ {
+ EmitVectorPairwiseSseOrSse2OpF(context, nameof(Sse.Add));
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, () =>
+ {
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd));
+ });
+ }
}
public static void Fdiv_S(ILEmitterCtx context)
@@ -462,10 +612,18 @@ namespace ChocolArm64.Instructions
public static void Fmaxp_V(ILEmitterCtx context)
{
- EmitVectorPairwiseOpF(context, () =>
+ if (Optimizations.FastFP && Optimizations.UseSse
+ && Optimizations.UseSse2)
{
- EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax));
- });
+ EmitVectorPairwiseSseOrSse2OpF(context, nameof(Sse.Max));
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, () =>
+ {
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax));
+ });
+ }
}
public static void Fmin_S(ILEmitterCtx context)
@@ -518,10 +676,18 @@ namespace ChocolArm64.Instructions
public static void Fminp_V(ILEmitterCtx context)
{
- EmitVectorPairwiseOpF(context, () =>
+ if (Optimizations.FastFP && Optimizations.UseSse
+ && Optimizations.UseSse2)
{
- EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin));
- });
+ EmitVectorPairwiseSseOrSse2OpF(context, nameof(Sse.Min));
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, () =>
+ {
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin));
+ });
+ }
}
public static void Fmla_Se(ILEmitterCtx context)
@@ -1085,18 +1251,42 @@ namespace ChocolArm64.Instructions
public static void Frecpe_S(ILEmitterCtx context)
{
- EmitScalarUnaryOpF(context, () =>
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && Optimizations.UseSse
+ && sizeF == 0)
{
- EmitUnarySoftFloatCall(context, nameof(SoftFloat.RecipEstimate));
- });
+ EmitScalarSseOrSse2OpF(context, nameof(Sse.ReciprocalScalar));
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, () =>
+ {
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipEstimate));
+ });
+ }
}
public static void Frecpe_V(ILEmitterCtx context)
{
- EmitVectorUnaryOpF(context, () =>
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && Optimizations.UseSse
+ && sizeF == 0)
{
- EmitUnarySoftFloatCall(context, nameof(SoftFloat.RecipEstimate));
- });
+ EmitVectorSseOrSse2OpF(context, nameof(Sse.Reciprocal));
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, () =>
+ {
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipEstimate));
+ });
+ }
}
public static void Frecps_S(ILEmitterCtx context) // Fused.
@@ -1398,18 +1588,42 @@ namespace ChocolArm64.Instructions
public static void Frsqrte_S(ILEmitterCtx context)
{
- EmitScalarUnaryOpF(context, () =>
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && Optimizations.UseSse
+ && sizeF == 0)
{
- EmitUnarySoftFloatCall(context, nameof(SoftFloat.InvSqrtEstimate));
- });
+ EmitScalarSseOrSse2OpF(context, nameof(Sse.ReciprocalSqrtScalar));
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, () =>
+ {
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtEstimate));
+ });
+ }
}
public static void Frsqrte_V(ILEmitterCtx context)
{
- EmitVectorUnaryOpF(context, () =>
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && Optimizations.UseSse
+ && sizeF == 0)
{
- EmitUnarySoftFloatCall(context, nameof(SoftFloat.InvSqrtEstimate));
- });
+ EmitVectorSseOrSse2OpF(context, nameof(Sse.ReciprocalSqrt));
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, () =>
+ {
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtEstimate));
+ });
+ }
}
public static void Frsqrts_S(ILEmitterCtx context) // Fused.