aboutsummaryrefslogtreecommitdiff
path: root/ChocolArm64
diff options
context:
space:
mode:
authorLDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>2018-12-26 18:11:36 +0100
committergdkchan <gab.dark.100@gmail.com>2018-12-26 15:11:36 -0200
commit0f5b6dfbe8d4bcc4df3f670e366a967d8ea103db (patch)
tree89fe781d39e9e02534fd455a26008db8a3a14341 /ChocolArm64
parentd8f2497f155046402cd15c65eca0326faf3aefd6 (diff)
Fix Frecpe_S/V and Frsqrte_S/V (full FP emu.). Add Sse Opt. & SoftFloat Impl. for Fcmeq/ge/gt/le/lt_S/V (Reg & Zero), Faddp_S/V, Fmaxp_V, Fminp_V Inst.; add Sse Opt. for Shll_V, S/Ushll_V Inst.; improve Sse Opt. for Xtn_V Inst.. Add Tests. (#543)
* Update Optimizations.cs * Update InstEmitSimdShift.cs * Update InstEmitSimdHelper.cs * Update InstEmitSimdArithmetic.cs * Update InstEmitSimdMove.cs * Update SoftFloat.cs * Update InstEmitSimdCmp.cs * Update CpuTestSimdShImm.cs * Update CpuTestSimd.cs * Update CpuTestSimdReg.cs * Nit. * Update SoftFloat.cs * Update InstEmitSimdArithmetic.cs * Update InstEmitSimdHelper.cs * Update CpuTestSimd.cs * Explicit some implicit casts. * Simplify some powers; nits. * Update OpCodeTable.cs * Update InstEmitSimdArithmetic.cs * Update CpuTestSimdReg.cs * Update InstEmitSimdArithmetic.cs
Diffstat (limited to 'ChocolArm64')
-rw-r--r--ChocolArm64/Instructions/InstEmitSimdArithmetic.cs268
-rw-r--r--ChocolArm64/Instructions/InstEmitSimdCmp.cs282
-rw-r--r--ChocolArm64/Instructions/InstEmitSimdHelper.cs110
-rw-r--r--ChocolArm64/Instructions/InstEmitSimdMove.cs80
-rw-r--r--ChocolArm64/Instructions/InstEmitSimdShift.cs98
-rw-r--r--ChocolArm64/Instructions/SoftFloat.cs701
-rw-r--r--ChocolArm64/OpCodeTable.cs1
-rw-r--r--ChocolArm64/Optimizations.cs2
8 files changed, 1177 insertions, 365 deletions
diff --git a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs
index 013d0432..d1e71ecb 100644
--- a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs
+++ b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs
@@ -176,12 +176,119 @@ namespace ChocolArm64.Instructions
public static void Fabd_S(ILEmitterCtx context)
{
- EmitScalarBinaryOpF(context, () =>
+ if (Optimizations.FastFP && Optimizations.UseSse2)
{
- context.Emit(OpCodes.Sub);
+ OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
- EmitUnaryMathCall(context, nameof(Math.Abs));
- });
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Type[] typesSsv = new Type[] { typeof(float) };
+ Type[] typesSubAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
+
+ context.EmitLdc_R4(-0f);
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv));
+
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesSubAndNot));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot));
+
+ context.EmitStvec(op.Rd);
+
+ EmitVectorZero32_128(context, op.Rd);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Type[] typesSsv = new Type[] { typeof(double) };
+ Type[] typesSubAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
+
+ context.EmitLdc_R8(-0d);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
+
+ EmitLdvecWithCastToDouble(context, op.Rn);
+ EmitLdvecWithCastToDouble(context, op.Rm);
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesSubAndNot));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot));
+
+ EmitStvecWithCastFromDouble(context, op.Rd);
+
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
+ else
+ {
+ EmitScalarBinaryOpF(context, () =>
+ {
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub));
+
+ EmitUnaryMathCall(context, nameof(Math.Abs));
+ });
+ }
+ }
+
+ public static void Fabd_V(ILEmitterCtx context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ Type[] typesSav = new Type[] { typeof(float) };
+ Type[] typesSubAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
+
+ context.EmitLdc_R4(-0f);
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav));
+
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesSubAndNot));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot));
+
+ context.EmitStvec(op.Rd);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
+ else /* if (sizeF == 1) */
+ {
+ Type[] typesSav = new Type[] { typeof(double) };
+ Type[] typesSubAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
+
+ context.EmitLdc_R8(-0d);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ EmitLdvecWithCastToDouble(context, op.Rn);
+ EmitLdvecWithCastToDouble(context, op.Rm);
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAndNot));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot));
+
+ EmitStvecWithCastFromDouble(context, op.Rd);
+ }
+ }
+ else
+ {
+ EmitVectorBinaryOpF(context, () =>
+ {
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub));
+
+ EmitUnaryMathCall(context, nameof(Math.Abs));
+ });
+ }
}
public static void Fabs_S(ILEmitterCtx context)
@@ -321,17 +428,60 @@ namespace ChocolArm64.Instructions
int sizeF = op.Size & 1;
- EmitVectorExtractF(context, op.Rn, 0, sizeF);
- EmitVectorExtractF(context, op.Rn, 1, sizeF);
+ if (Optimizations.FastFP && Optimizations.UseSse3)
+ {
+ if (sizeF == 0)
+ {
+ Type[] typesAddH = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
- context.Emit(OpCodes.Add);
+ context.EmitLdvec(op.Rn);
+ context.Emit(OpCodes.Dup);
- EmitScalarSetF(context, op.Rd, sizeF);
+ context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH));
+
+ context.EmitStvec(op.Rd);
+
+ EmitVectorZero32_128(context, op.Rd);
+ }
+ else /* if (sizeF == 1) */
+ {
+ Type[] typesAddH = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
+
+ EmitLdvecWithCastToDouble(context, op.Rn);
+ context.Emit(OpCodes.Dup);
+
+ context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH));
+
+ EmitStvecWithCastFromDouble(context, op.Rd);
+
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
+ else
+ {
+ EmitVectorExtractF(context, op.Rn, 0, sizeF);
+ EmitVectorExtractF(context, op.Rn, 1, sizeF);
+
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd));
+
+ EmitScalarSetF(context, op.Rd, sizeF);
+ }
}
public static void Faddp_V(ILEmitterCtx context)
{
- EmitVectorPairwiseOpF(context, () => context.Emit(OpCodes.Add));
+ if (Optimizations.FastFP && Optimizations.UseSse
+ && Optimizations.UseSse2)
+ {
+ EmitVectorPairwiseSseOrSse2OpF(context, nameof(Sse.Add));
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, () =>
+ {
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd));
+ });
+ }
}
public static void Fdiv_S(ILEmitterCtx context)
@@ -462,10 +612,18 @@ namespace ChocolArm64.Instructions
public static void Fmaxp_V(ILEmitterCtx context)
{
- EmitVectorPairwiseOpF(context, () =>
+ if (Optimizations.FastFP && Optimizations.UseSse
+ && Optimizations.UseSse2)
{
- EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax));
- });
+ EmitVectorPairwiseSseOrSse2OpF(context, nameof(Sse.Max));
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, () =>
+ {
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax));
+ });
+ }
}
public static void Fmin_S(ILEmitterCtx context)
@@ -518,10 +676,18 @@ namespace ChocolArm64.Instructions
public static void Fminp_V(ILEmitterCtx context)
{
- EmitVectorPairwiseOpF(context, () =>
+ if (Optimizations.FastFP && Optimizations.UseSse
+ && Optimizations.UseSse2)
{
- EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin));
- });
+ EmitVectorPairwiseSseOrSse2OpF(context, nameof(Sse.Min));
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, () =>
+ {
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin));
+ });
+ }
}
public static void Fmla_Se(ILEmitterCtx context)
@@ -1085,18 +1251,42 @@ namespace ChocolArm64.Instructions
public static void Frecpe_S(ILEmitterCtx context)
{
- EmitScalarUnaryOpF(context, () =>
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && Optimizations.UseSse
+ && sizeF == 0)
{
- EmitUnarySoftFloatCall(context, nameof(SoftFloat.RecipEstimate));
- });
+ EmitScalarSseOrSse2OpF(context, nameof(Sse.ReciprocalScalar));
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, () =>
+ {
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipEstimate));
+ });
+ }
}
public static void Frecpe_V(ILEmitterCtx context)
{
- EmitVectorUnaryOpF(context, () =>
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && Optimizations.UseSse
+ && sizeF == 0)
{
- EmitUnarySoftFloatCall(context, nameof(SoftFloat.RecipEstimate));
- });
+ EmitVectorSseOrSse2OpF(context, nameof(Sse.Reciprocal));
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, () =>
+ {
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipEstimate));
+ });
+ }
}
public static void Frecps_S(ILEmitterCtx context) // Fused.
@@ -1398,18 +1588,42 @@ namespace ChocolArm64.Instructions
public static void Frsqrte_S(ILEmitterCtx context)
{
- EmitScalarUnaryOpF(context, () =>
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && Optimizations.UseSse
+ && sizeF == 0)
{
- EmitUnarySoftFloatCall(context, nameof(SoftFloat.InvSqrtEstimate));
- });
+ EmitScalarSseOrSse2OpF(context, nameof(Sse.ReciprocalSqrtScalar));
+ }
+ else
+ {
+ EmitScalarUnaryOpF(context, () =>
+ {
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtEstimate));
+ });
+ }
}
public static void Frsqrte_V(ILEmitterCtx context)
{
- EmitVectorUnaryOpF(context, () =>
+ OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (Optimizations.FastFP && Optimizations.UseSse
+ && sizeF == 0)
{
- EmitUnarySoftFloatCall(context, nameof(SoftFloat.InvSqrtEstimate));
- });
+ EmitVectorSseOrSse2OpF(context, nameof(Sse.ReciprocalSqrt));
+ }
+ else
+ {
+ EmitVectorUnaryOpF(context, () =>
+ {
+ EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtEstimate));
+ });
+ }
}
public static void Frsqrts_S(ILEmitterCtx context) // Fused.
diff --git a/ChocolArm64/Instructions/InstEmitSimdCmp.cs b/ChocolArm64/Instructions/InstEmitSimdCmp.cs
index 3ee25482..e1184375 100644
--- a/ChocolArm64/Instructions/InstEmitSimdCmp.cs
+++ b/ChocolArm64/Instructions/InstEmitSimdCmp.cs
@@ -15,7 +15,7 @@ namespace ChocolArm64.Instructions
{
public static void Cmeq_S(ILEmitterCtx context)
{
- EmitCmp(context, OpCodes.Beq_S, scalar: true);
+ EmitCmpOp(context, OpCodes.Beq_S, scalar: true);
}
public static void Cmeq_V(ILEmitterCtx context)
@@ -32,28 +32,28 @@ namespace ChocolArm64.Instructions
}
else
{
- EmitCmp(context, OpCodes.Beq_S, scalar: false);
+ EmitCmpOp(context, OpCodes.Beq_S, scalar: false);
}
}
else
{
- EmitCmp(context, OpCodes.Beq_S, scalar: false);
+ EmitCmpOp(context, OpCodes.Beq_S, scalar: false);
}
}
public static void Cmge_S(ILEmitterCtx context)
{
- EmitCmp(context, OpCodes.Bge_S, scalar: true);
+ EmitCmpOp(context, OpCodes.Bge_S, scalar: true);
}
public static void Cmge_V(ILEmitterCtx context)
{
- EmitCmp(context, OpCodes.Bge_S, scalar: false);
+ EmitCmpOp(context, OpCodes.Bge_S, scalar: false);
}
public static void Cmgt_S(ILEmitterCtx context)
{
- EmitCmp(context, OpCodes.Bgt_S, scalar: true);
+ EmitCmpOp(context, OpCodes.Bgt_S, scalar: true);
}
public static void Cmgt_V(ILEmitterCtx context)
@@ -70,63 +70,63 @@ namespace ChocolArm64.Instructions
}
else
{
- EmitCmp(context, OpCodes.Bgt_S, scalar: false);
+ EmitCmpOp(context, OpCodes.Bgt_S, scalar: false);
}
}
else
{
- EmitCmp(context, OpCodes.Bgt_S, scalar: false);
+ EmitCmpOp(context, OpCodes.Bgt_S, scalar: false);
}
}
public static void Cmhi_S(ILEmitterCtx context)
{
- EmitCmp(context, OpCodes.Bgt_Un_S, scalar: true);
+ EmitCmpOp(context, OpCodes.Bgt_Un_S, scalar: true);
}
public static void Cmhi_V(ILEmitterCtx context)
{
- EmitCmp(context, OpCodes.Bgt_Un_S, scalar: false);
+ EmitCmpOp(context, OpCodes.Bgt_Un_S, scalar: false);
}
public static void Cmhs_S(ILEmitterCtx context)
{
- EmitCmp(context, OpCodes.Bge_Un_S, scalar: true);
+ EmitCmpOp(context, OpCodes.Bge_Un_S, scalar: true);
}
public static void Cmhs_V(ILEmitterCtx context)
{
- EmitCmp(context, OpCodes.Bge_Un_S, scalar: false);
+ EmitCmpOp(context, OpCodes.Bge_Un_S, scalar: false);
}
public static void Cmle_S(ILEmitterCtx context)
{
- EmitCmp(context, OpCodes.Ble_S, scalar: true);
+ EmitCmpOp(context, OpCodes.Ble_S, scalar: true);
}
public static void Cmle_V(ILEmitterCtx context)
{
- EmitCmp(context, OpCodes.Ble_S, scalar: false);
+ EmitCmpOp(context, OpCodes.Ble_S, scalar: false);
}
public static void Cmlt_S(ILEmitterCtx context)
{
- EmitCmp(context, OpCodes.Blt_S, scalar: true);
+ EmitCmpOp(context, OpCodes.Blt_S, scalar: true);
}
public static void Cmlt_V(ILEmitterCtx context)
{
- EmitCmp(context, OpCodes.Blt_S, scalar: false);
+ EmitCmpOp(context, OpCodes.Blt_S, scalar: false);
}
public static void Cmtst_S(ILEmitterCtx context)
{
- EmitCmtst(context, scalar: true);
+ EmitCmtstOp(context, scalar: true);
}
public static void Cmtst_V(ILEmitterCtx context)
{
- EmitCmtst(context, scalar: false);
+ EmitCmtstOp(context, scalar: false);
}
public static void Fccmp_S(ILEmitterCtx context)
@@ -145,7 +145,7 @@ namespace ChocolArm64.Instructions
context.MarkLabel(lblTrue);
- EmitFcmpE(context, signalNaNs: false);
+ EmitFcmpOrFcmpe(context, signalNaNs: false);
context.MarkLabel(lblEnd);
}
@@ -166,120 +166,152 @@ namespace ChocolArm64.Instructions
context.MarkLabel(lblTrue);
- EmitFcmpE(context, signalNaNs: true);
+ EmitFcmpOrFcmpe(context, signalNaNs: true);
context.MarkLabel(lblEnd);
}
public static void Fcmeq_S(ILEmitterCtx context)
{
- if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse
- && Optimizations.UseSse2)
+ if (Optimizations.FastFP && Optimizations.UseSse
+ && Optimizations.UseSse2)
{
- EmitScalarSseOrSse2OpF(context, nameof(Sse.CompareEqualScalar));
+ EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareEqualScalar), scalar: true);
}
else
{
- EmitScalarFcmp(context, OpCodes.Beq_S);
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareEQ), scalar: true);
}
}
public static void Fcmeq_V(ILEmitterCtx context)
{
- if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse
- && Optimizations.UseSse2)
+ if (Optimizations.FastFP && Optimizations.UseSse
+ && Optimizations.UseSse2)
{
- EmitVectorSseOrSse2OpF(context, nameof(Sse.CompareEqual));
+ EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareEqual), scalar: false);
}
else
{
- EmitVectorFcmp(context, OpCodes.Beq_S);
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareEQ), scalar: false);
}
}
public static void Fcmge_S(ILEmitterCtx context)
{
- if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse
- && Optimizations.UseSse2)
+ if (Optimizations.FastFP && Optimizations.UseSse
+ && Optimizations.UseSse2)
{
- EmitScalarSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqualScalar));
+ EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqualScalar), scalar: true);
}
else
{
- EmitScalarFcmp(context, OpCodes.Bge_S);
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGE), scalar: true);
}
}
public static void Fcmge_V(ILEmitterCtx context)
{
- if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse
- && Optimizations.UseSse2)
+ if (Optimizations.FastFP && Optimizations.UseSse
+ && Optimizations.UseSse2)
{
- EmitVectorSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqual));
+ EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqual), scalar: false);
}
else
{
- EmitVectorFcmp(context, OpCodes.Bge_S);
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGE), scalar: false);
}
}
public static void Fcmgt_S(ILEmitterCtx context)
{
- if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse
- && Optimizations.UseSse2)
+ if (Optimizations.FastFP && Optimizations.UseSse
+ && Optimizations.UseSse2)
{
- EmitScalarSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanScalar));
+ EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanScalar), scalar: true);
}
else
{
- EmitScalarFcmp(context, OpCodes.Bgt_S);
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGT), scalar: true);
}
}
public static void Fcmgt_V(ILEmitterCtx context)
{
- if (context.CurrOp is OpCodeSimdReg64 && Optimizations.UseSse
- && Optimizations.UseSse2)
+ if (Optimizations.FastFP && Optimizations.UseSse
+ && Optimizations.UseSse2)
{
- EmitVectorSseOrSse2OpF(context, nameof(Sse.CompareGreaterThan));
+ EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareGreaterThan), scalar: false);
}
else
{
- EmitVectorFcmp(context, OpCodes.Bgt_S);
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGT), scalar: false);
}
}
public static void Fcmle_S(ILEmitterCtx context)
{
- EmitScalarFcmp(context, OpCodes.Ble_S);
+ if (Optimizations.FastFP && Optimizations.UseSse
+ && Optimizations.UseSse2)
+ {
+ EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqualScalar), scalar: true, isLeOrLt: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLE), scalar: true);
+ }
}
public static void Fcmle_V(ILEmitterCtx context)
{
- EmitVectorFcmp(context, OpCodes.Ble_S);
+ if (Optimizations.FastFP && Optimizations.UseSse
+ && Optimizations.UseSse2)
+ {
+ EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanOrEqual), scalar: false, isLeOrLt: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLE), scalar: false);
+ }
}
public static void Fcmlt_S(ILEmitterCtx context)
{
- EmitScalarFcmp(context, OpCodes.Blt_S);
+ if (Optimizations.FastFP && Optimizations.UseSse
+ && Optimizations.UseSse2)
+ {
+ EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareGreaterThanScalar), scalar: true, isLeOrLt: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLT), scalar: true);
+ }
}
public static void Fcmlt_V(ILEmitterCtx context)
{
- EmitVectorFcmp(context, OpCodes.Blt_S);
+ if (Optimizations.FastFP && Optimizations.UseSse
+ && Optimizations.UseSse2)
+ {
+ EmitCmpSseOrSse2OpF(context, nameof(Sse.CompareGreaterThan), scalar: false, isLeOrLt: true);
+ }
+ else
+ {
+ EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLT), scalar: false);
+ }
}
public static void Fcmp_S(ILEmitterCtx context)
{
- EmitFcmpE(context, signalNaNs: false);
+ EmitFcmpOrFcmpe(context, signalNaNs: false);
}
public static void Fcmpe_S(ILEmitterCtx context)
{
- EmitFcmpE(context, signalNaNs: true);
+ EmitFcmpOrFcmpe(context, signalNaNs: true);
}
- private static void EmitFcmpE(ILEmitterCtx context, bool signalNaNs)
+ private static void EmitFcmpOrFcmpe(ILEmitterCtx context, bool signalNaNs)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
@@ -430,7 +462,7 @@ namespace ChocolArm64.Instructions
{
context.EmitLdc_R4(0f);
}
- else // if (op.Size == 1)
+ else /* if (op.Size == 1) */
{
context.EmitLdc_R8(0d);
}
@@ -448,7 +480,7 @@ namespace ChocolArm64.Instructions
}
}
- private static void EmitCmp(ILEmitterCtx context, OpCode ilOp, bool scalar)
+ private static void EmitCmpOp(ILEmitterCtx context, OpCode ilOp, bool scalar)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
@@ -492,7 +524,7 @@ namespace ChocolArm64.Instructions
}
}
- private static void EmitCmtst(ILEmitterCtx context, bool scalar)
+ private static void EmitCmtstOp(ILEmitterCtx context, bool scalar)
{
OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
@@ -532,84 +564,134 @@ namespace ChocolArm64.Instructions
}
}
- private static void EmitScalarFcmp(ILEmitterCtx context, OpCode ilOp)
- {
- EmitFcmp(context, ilOp, 0, scalar: true);
- }
-
- private static void EmitVectorFcmp(ILEmitterCtx context, OpCode ilOp)
+ private static void EmitCmpOpF(ILEmitterCtx context, string name, bool scalar)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
int bytes = op.GetBitsCount() >> 3;
- int elems = bytes >> sizeF + 2;
+ int elems = !scalar ? bytes >> sizeF + 2 : 1;
for (int index = 0; index < elems; index++)
{
- EmitFcmp(context, ilOp, index, scalar: false);
+ EmitVectorExtractF(context, op.Rn, index, sizeF);
+
+ if (op is OpCodeSimdReg64 binOp)
+ {
+ EmitVectorExtractF(context, binOp.Rm, index, sizeF);
+ }
+ else
+ {
+ if (sizeF == 0)
+ {
+ context.EmitLdc_R4(0f);
+ }
+ else /* if (sizeF == 1) */
+ {
+ context.EmitLdc_R8(0d);
+ }
+ }
+
+ EmitSoftFloatCall(context, name);
+
+ EmitVectorInsertF(context, op.Rd, index, sizeF);
}
- if (op.RegisterSize == RegisterSize.Simd64)
+ if (!scalar)
{
- EmitVectorZeroUpper(context, op.Rd);
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
+ else
+ {
+ if (sizeF == 0)
+ {
+ EmitVectorZero32_128(context, op.Rd);
+ }
+ else /* if (sizeF == 1) */
+ {
+ EmitVectorZeroUpper(context, op.Rd);
+ }
}
}
- private static void EmitFcmp(ILEmitterCtx context, OpCode ilOp, int index, bool scalar)
+ private static void EmitCmpSseOrSse2OpF(ILEmitterCtx context, string name, bool scalar, bool isLeOrLt = false)
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
int sizeF = op.Size & 1;
- ulong szMask = ulong.MaxValue >> (64 - (32 << sizeF));
+ if (sizeF == 0)
+ {
+ Type[] types = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
- EmitVectorExtractF(context, op.Rn, index, sizeF);
+ if (!isLeOrLt)
+ {
+ context.EmitLdvec(op.Rn);
+ }
- if (op is OpCodeSimdReg64 binOp)
- {
- EmitVectorExtractF(context, binOp.Rm, index, sizeF);
- }
- else if (sizeF == 0)
- {
- context.EmitLdc_R4(0f);
- }
- else /* if (sizeF == 1) */
- {
- context.EmitLdc_R8(0d);
- }
+ if (op is OpCodeSimdReg64 binOp)
+ {
+ context.EmitLdvec(binOp.Rm);
+ }
+ else
+ {
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
+ }
- ILLabel lblTrue = new ILLabel();
- ILLabel lblEnd = new ILLabel();
+ if (isLeOrLt)
+ {
+ context.EmitLdvec(op.Rn);
+ }
- context.Emit(ilOp, lblTrue);
+ context.EmitCall(typeof(Sse).GetMethod(name, types));
- if (scalar)
- {
- EmitVectorZeroAll(context, op.Rd);
+ context.EmitStvec(op.Rd);
+
+ if (scalar)
+ {
+ EmitVectorZero32_128(context, op.Rd);
+ }
+ else if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
+ }
}
- else
+ else /* if (sizeF == 1) */
{
- EmitVectorInsert(context, op.Rd, index, sizeF + 2, 0);
- }
+ Type[] types = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
- context.Emit(OpCodes.Br_S, lblEnd);
+ if (!isLeOrLt)
+ {
+ EmitLdvecWithCastToDouble(context, op.Rn);
+ }
- context.MarkLabel(lblTrue);
+ if (op is OpCodeSimdReg64 binOp)
+ {
+ EmitLdvecWithCastToDouble(context, binOp.Rm);
+ }
+ else
+ {
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorDoubleZero));
+ }
- if (scalar)
- {
- EmitVectorInsert(context, op.Rd, index, 3, (long)szMask);
+ if (isLeOrLt)
+ {
+ EmitLdvecWithCastToDouble(context, op.Rn);
+ }
- EmitVectorZeroUpper(context, op.Rd);
- }
- else
- {
- EmitVectorInsert(context, op.Rd, index, sizeF + 2, (long)szMask);
- }
+ context.EmitCall(typeof(Sse2).GetMethod(name, types));
- context.MarkLabel(lblEnd);
+ EmitStvecWithCastFromDouble(context, op.Rd);
+
+ if (scalar)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
}
}
}
diff --git a/ChocolArm64/Instructions/InstEmitSimdHelper.cs b/ChocolArm64/Instructions/InstEmitSimdHelper.cs
index 7b597be3..cea481a6 100644
--- a/ChocolArm64/Instructions/InstEmitSimdHelper.cs
+++ b/ChocolArm64/Instructions/InstEmitSimdHelper.cs
@@ -322,26 +322,6 @@ namespace ChocolArm64.Instructions
context.EmitCall(mthdInfo);
}
- public static void EmitUnarySoftFloatCall(ILEmitterCtx context, string name)
- {
- IOpCodeSimd64 op = (IOpCodeSimd64)context.CurrOp;
-
- int sizeF = op.Size & 1;
-
- MethodInfo mthdInfo;
-
- if (sizeF == 0)
- {
- mthdInfo = typeof(SoftFloat).GetMethod(name, new Type[] { typeof(float) });
- }
- else /* if (sizeF == 1) */
- {
- mthdInfo = typeof(SoftFloat).GetMethod(name, new Type[] { typeof(double) });
- }
-
- context.EmitCall(mthdInfo);
- }
-
public static void EmitSoftFloatCall(ILEmitterCtx context, string name)
{
IOpCodeSimd64 op = (IOpCodeSimd64)context.CurrOp;
@@ -909,6 +889,96 @@ namespace ChocolArm64.Instructions
}
}
+ public static void EmitVectorPairwiseSseOrSse2OpF(ILEmitterCtx context, string name)
+ {
+ OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
+
+ int sizeF = op.Size & 1;
+
+ if (sizeF == 0)
+ {
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ Type[] types = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
+
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.UnpackLow), types));
+
+ context.Emit(OpCodes.Dup);
+ context.EmitStvectmp();
+
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh), types));
+
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
+
+ context.EmitLdvectmp();
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveHighToLow), types));
+
+ context.EmitCall(typeof(Sse).GetMethod(name, types));
+
+ context.EmitStvec(op.Rd);
+ }
+ else /* if (op.RegisterSize == RegisterSize.Simd128) */
+ {
+ Type[] typesSfl = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>), typeof(byte) };
+ Type[] types = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
+
+ context.EmitLdvec(op.Rn);
+
+ context.Emit(OpCodes.Dup);
+ context.EmitStvectmp();
+
+ context.EmitLdvec(op.Rm);
+
+ context.Emit(OpCodes.Dup);
+ context.EmitStvectmp2();
+
+ context.EmitLdc_I4(2 << 6 | 0 << 4 | 2 << 2 | 0 << 0);
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
+
+ context.EmitLdvectmp();
+ context.EmitLdvectmp2();
+
+ context.EmitLdc_I4(3 << 6 | 1 << 4 | 3 << 2 | 1 << 0);
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
+
+ context.EmitCall(typeof(Sse).GetMethod(name, types));
+
+ context.EmitStvec(op.Rd);
+ }
+ }
+ else /* if (sizeF == 1) */
+ {
+ Type[] types = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
+
+ EmitLdvecWithCastToDouble(context, op.Rn);
+
+ context.Emit(OpCodes.Dup);
+ context.EmitStvectmp();
+
+ EmitLdvecWithCastToDouble(context, op.Rm);
+
+ context.Emit(OpCodes.Dup);
+ context.EmitStvectmp2();
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackLow), types));
+
+ context.EmitLdvectmp();
+ context.EmitLdvectmp2();
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackHigh), types));
+
+ context.EmitCall(typeof(Sse2).GetMethod(name, types));
+
+ EmitStvecWithCastFromDouble(context, op.Rd);
+ }
+ }
+
[Flags]
public enum SaturatingFlags
{
diff --git a/ChocolArm64/Instructions/InstEmitSimdMove.cs b/ChocolArm64/Instructions/InstEmitSimdMove.cs
index 0d9aa312..d40ccff9 100644
--- a/ChocolArm64/Instructions/InstEmitSimdMove.cs
+++ b/ChocolArm64/Instructions/InstEmitSimdMove.cs
@@ -377,75 +377,47 @@ namespace ChocolArm64.Instructions
{
OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp;
- int elems = 8 >> op.Size;
-
- int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
-
- if (Optimizations.UseSse41 && op.Size < 2)
+ if (Optimizations.UseSsse3)
{
- void EmitZeroVector()
- {
- switch (op.Size)
- {
- case 0: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt16Zero)); break;
- case 1: VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt32Zero)); break;
- }
- }
-
- //For XTN, first operand is source, second operand is 0.
- //For XTN2, first operand is 0, second operand is source.
- if (part != 0)
+ long[] masks = new long[]
{
- EmitZeroVector();
- }
-
- EmitLdvecWithSignedCast(context, op.Rn, op.Size + 1);
-
- //Set mask to discard the upper half of the wide elements.
- switch (op.Size)
- {
- case 0: context.EmitLdc_I4(0x00ff); break;
- case 1: context.EmitLdc_I4(0x0000ffff); break;
- }
-
- Type wideType = IntTypesPerSizeLog2[op.Size + 1];
+ 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0,
+ 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0,
+ 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0
+ };
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), new Type[] { wideType }));
+ Type[] typesMov = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
+ Type[] typesSfl = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) };
+ Type[] typesSve = new Type[] { typeof(long), typeof(long) };
- wideType = VectorIntTypesPerSizeLog2[op.Size + 1];
+ string nameMov = op.RegisterSize == RegisterSize.Simd128
+ ? nameof(Sse.MoveLowToHigh)
+ : nameof(Sse.MoveHighToLow);
- Type[] wideTypes = new Type[] { wideType, wideType };
+ context.EmitLdvec(op.Rd);
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), wideTypes));
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh), typesMov));
- if (part == 0)
- {
- EmitZeroVector();
- }
+ EmitLdvecWithSignedCast(context, op.Rn, 0);
- //Pack values with signed saturation, the signed saturation shouldn't
- //saturate anything since the upper bits were masked off.
- Type sseType = op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
-
- context.EmitCall(sseType.GetMethod(nameof(Sse2.PackUnsignedSaturate), wideTypes));
-
- if (part != 0)
- {
- //For XTN2, we additionally need to discard the upper bits
- //of the target register and OR the result with it.
- EmitVectorZeroUpper(context, op.Rd);
+ context.EmitLdc_I8(masks[op.Size]);
+ context.Emit(OpCodes.Dup);
- EmitLdvecWithUnsignedCast(context, op.Rd, op.Size);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));
- Type narrowType = VectorUIntTypesPerSizeLog2[op.Size];
+ context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl));
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), new Type[] { narrowType, narrowType }));
- }
+ context.EmitCall(typeof(Sse).GetMethod(nameMov, typesMov));
- EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
+ context.EmitStvec(op.Rd);
}
else
{
+ int elems = 8 >> op.Size;
+
+ int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
+
if (part != 0)
{
context.EmitLdvec(op.Rd);
diff --git a/ChocolArm64/Instructions/InstEmitSimdShift.cs b/ChocolArm64/Instructions/InstEmitSimdShift.cs
index 5b606167..84305211 100644
--- a/ChocolArm64/Instructions/InstEmitSimdShift.cs
+++ b/ChocolArm64/Instructions/InstEmitSimdShift.cs
@@ -22,9 +22,11 @@ namespace ChocolArm64.Instructions
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
+ int shift = GetImmShl(op);
+
EmitScalarUnaryOpZx(context, () =>
{
- context.EmitLdc_I4(GetImmShl(op));
+ context.EmitLdc_I4(shift);
context.Emit(OpCodes.Shl);
});
@@ -34,13 +36,15 @@ namespace ChocolArm64.Instructions
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
+ int shift = GetImmShl(op);
+
if (Optimizations.UseSse2 && op.Size > 0)
{
Type[] typesSll = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) };
EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
- context.EmitLdc_I4(GetImmShl(op));
+ context.EmitLdc_I4(shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
EmitStvecWithUnsignedCast(context, op.Rd, op.Size);
@@ -54,7 +58,7 @@ namespace ChocolArm64.Instructions
{
EmitVectorUnaryOpZx(context, () =>
{
- context.EmitLdc_I4(GetImmShl(op));
+ context.EmitLdc_I4(shift);
context.Emit(OpCodes.Shl);
});
@@ -67,7 +71,33 @@ namespace ChocolArm64.Instructions
int shift = 8 << op.Size;
- EmitVectorShImmWidenBinaryZx(context, () => context.Emit(OpCodes.Shl), shift);
+ if (Optimizations.UseSse41)
+ {
+ Type[] typesSll = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], typeof(byte) };
+ Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] };
+
+ string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16),
+ nameof(Sse41.ConvertToVector128Int32),
+ nameof(Sse41.ConvertToVector128Int64) };
+
+ int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
+
+ EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
+
+ context.EmitLdc_I4(numBytes);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
+
+ context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
+
+ context.EmitLdc_I4(shift);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
+
+ EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1);
+ }
+ else
+ {
+ EmitVectorShImmWidenBinaryZx(context, () => context.Emit(OpCodes.Shl), shift);
+ }
}
public static void Shrn_V(ILEmitterCtx context)
@@ -362,7 +392,35 @@ namespace ChocolArm64.Instructions
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
- EmitVectorShImmWidenBinarySx(context, () => context.Emit(OpCodes.Shl), GetImmShl(op));
+ int shift = GetImmShl(op);
+
+ if (Optimizations.UseSse41)
+ {
+ Type[] typesSll = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], typeof(byte) };
+ Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] };
+
+ string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16),
+ nameof(Sse41.ConvertToVector128Int32),
+ nameof(Sse41.ConvertToVector128Int64) };
+
+ int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
+
+ EmitLdvecWithSignedCast(context, op.Rn, op.Size);
+
+ context.EmitLdc_I4(numBytes);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
+
+ context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
+
+ context.EmitLdc_I4(shift);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
+
+ EmitStvecWithSignedCast(context, op.Rd, op.Size + 1);
+ }
+ else
+ {
+ EmitVectorShImmWidenBinarySx(context, () => context.Emit(OpCodes.Shl), shift);
+ }
}
public static void Sshr_S(ILEmitterCtx context)
@@ -663,7 +721,35 @@ namespace ChocolArm64.Instructions
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
- EmitVectorShImmWidenBinaryZx(context, () => context.Emit(OpCodes.Shl), GetImmShl(op));
+ int shift = GetImmShl(op);
+
+ if (Optimizations.UseSse41)
+ {
+ Type[] typesSll = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], typeof(byte) };
+ Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] };
+
+ string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16),
+ nameof(Sse41.ConvertToVector128Int32),
+ nameof(Sse41.ConvertToVector128Int64) };
+
+ int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
+
+ EmitLdvecWithUnsignedCast(context, op.Rn, op.Size);
+
+ context.EmitLdc_I4(numBytes);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
+
+ context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
+
+ context.EmitLdc_I4(shift);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
+
+ EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1);
+ }
+ else
+ {
+ EmitVectorShImmWidenBinaryZx(context, () => context.Emit(OpCodes.Shl), shift);
+ }
}
public static void Ushr_S(ILEmitterCtx context)
diff --git a/ChocolArm64/Instructions/SoftFloat.cs b/ChocolArm64/Instructions/SoftFloat.cs
index 2af8afbd..39d279de 100644
--- a/ChocolArm64/Instructions/SoftFloat.cs
+++ b/ChocolArm64/Instructions/SoftFloat.cs
@@ -9,191 +9,72 @@ namespace ChocolArm64.Instructions
{
static SoftFloat()
{
- RecipEstimateTable = BuildRecipEstimateTable();
- InvSqrtEstimateTable = BuildInvSqrtEstimateTable();
+ RecipEstimateTable = BuildRecipEstimateTable();
+ RecipSqrtEstimateTable = BuildRecipSqrtEstimateTable();
}
- private static readonly byte[] RecipEstimateTable;
- private static readonly byte[] InvSqrtEstimateTable;
+ internal static readonly byte[] RecipEstimateTable;
+ internal static readonly byte[] RecipSqrtEstimateTable;
private static byte[] BuildRecipEstimateTable()
{
- byte[] table = new byte[256];
- for (ulong index = 0; index < 256; index++)
+ byte[] tbl = new byte[256];
+
+ for (int idx = 0; idx < 256; idx++)
{
- ulong a = index | 0x100;
+ uint src = (uint)idx + 256u;
- a = (a << 1) + 1;
- ulong b = 0x80000 / a;
- b = (b + 1) >> 1;
+ Debug.Assert(256u <= src && src < 512u);
- table[index] = (byte)(b & 0xFF);
- }
- return table;
- }
+ src = (src << 1) + 1u;
- private static byte[] BuildInvSqrtEstimateTable()
- {
- byte[] table = new byte[512];
- for (ulong index = 128; index < 512; index++)
- {
- ulong a = index;
- if (a < 256)
- {
- a = (a << 1) + 1;
- }
- else
- {
- a = (a | 1) << 1;
- }
+ uint aux = (1u << 19) / src;
- ulong b = 256;
- while (a * (b + 1) * (b + 1) < (1ul << 28))
- {
- b++;
- }
- b = (b + 1) >> 1;
+ uint dst = (aux + 1u) >> 1;
+
+ Debug.Assert(256u <= dst && dst < 512u);
- table[index] = (byte)(b & 0xFF);
+ tbl[idx] = (byte)(dst - 256u);
}
- return table;
- }
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static float RecipEstimate(float x)
- {
- return (float)RecipEstimate((double)x);
+ return tbl;
}
- public static double RecipEstimate(double x)
+ private static byte[] BuildRecipSqrtEstimateTable()
{
- ulong xBits = (ulong)BitConverter.DoubleToInt64Bits(x);
- ulong xSign = xBits & 0x8000000000000000;
- ulong xExp = (xBits >> 52) & 0x7FF;
- ulong scaled = xBits & ((1ul << 52) - 1);
+ byte[] tbl = new byte[384];
- if (xExp >= 2045)
+ for (int idx = 0; idx < 384; idx++)
{
- if (xExp == 0x7ff && scaled != 0)
- {
- // NaN
- return BitConverter.Int64BitsToDouble((long)(xBits | 0x0008000000000000));
- }
+ uint src = (uint)idx + 128u;
- // Infinity, or Out of range -> Zero
- return BitConverter.Int64BitsToDouble((long)xSign);
- }
+ Debug.Assert(128u <= src && src < 512u);
- if (xExp == 0)
- {
- if (scaled == 0)
+ if (src < 256u)
{
- // Zero -> Infinity
- return BitConverter.Int64BitsToDouble((long)(xSign | 0x7FF0000000000000));
- }
-
- // Denormal
- if ((scaled & (1ul << 51)) == 0)
- {
- xExp = ~0ul;
- scaled <<= 2;
+ src = (src << 1) + 1u;
}
else
{
- scaled <<= 1;
+ src = (src >> 1) << 1;
+ src = (src + 1u) << 1;
}
- }
-
- scaled >>= 44;
- scaled &= 0xFF;
- ulong resultExp = (2045 - xExp) & 0x7FF;
- ulong estimate = (ulong)RecipEstimateTable[scaled];
- ulong fraction = estimate << 44;
+ uint aux = 512u;
- if (resultExp == 0)
- {
- fraction >>= 1;
- fraction |= 1ul << 51;
- }
- else if (resultExp == 0x7FF)
- {
- resultExp = 0;
- fraction >>= 2;
- fraction |= 1ul << 50;
- }
-
- ulong result = xSign | (resultExp << 52) | fraction;
- return BitConverter.Int64BitsToDouble((long)result);
- }
-
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static float InvSqrtEstimate(float x)
- {
- return (float)InvSqrtEstimate((double)x);
- }
-
- public static double InvSqrtEstimate(double x)
- {
- ulong xBits = (ulong)BitConverter.DoubleToInt64Bits(x);
- ulong xSign = xBits & 0x8000000000000000;
- long xExp = (long)((xBits >> 52) & 0x7FF);
- ulong scaled = xBits & ((1ul << 52) - 1);
-
- if (xExp == 0x7FF && scaled != 0)
- {
- // NaN
- return BitConverter.Int64BitsToDouble((long)(xBits | 0x0008000000000000));
- }
-
- if (xExp == 0)
- {
- if (scaled == 0)
- {
- // Zero -> Infinity
- return BitConverter.Int64BitsToDouble((long)(xSign | 0x7FF0000000000000));
- }
-
- // Denormal
- while ((scaled & (1 << 51)) == 0)
+ while (src * (aux + 1u) * (aux + 1u) < (1u << 28))
{
- scaled <<= 1;
- xExp--;
+ aux = aux + 1u;
}
- scaled <<= 1;
- }
- if (xSign != 0)
- {
- // Negative -> NaN
- return BitConverter.Int64BitsToDouble((long)0x7FF8000000000000);
- }
+ uint dst = (aux + 1u) >> 1;
- if (xExp == 0x7ff && scaled == 0)
- {
- // Infinity -> Zero
- return BitConverter.Int64BitsToDouble((long)xSign);
- }
+ Debug.Assert(256u <= dst && dst < 512u);
- if (((ulong)xExp & 1) == 1)
- {
- scaled >>= 45;
- scaled &= 0xFF;
- scaled |= 0x80;
+ tbl[idx] = (byte)(dst - 256u);
}
- else
- {
- scaled >>= 44;
- scaled &= 0xFF;
- scaled |= 0x100;
- }
-
- ulong resultExp = ((ulong)(3068 - xExp) / 2) & 0x7FF;
- ulong estimate = (ulong)InvSqrtEstimateTable[scaled];
- ulong fraction = estimate << 44;
- ulong result = xSign | (resultExp << 52) | fraction;
- return BitConverter.Int64BitsToDouble((long)result);
+ return tbl;
}
}
@@ -395,12 +276,12 @@ namespace ChocolArm64.Instructions
{
intMant++;
- if (intMant == (uint)Math.Pow(2d, f))
+ if (intMant == 1u << f)
{
biasedExp = 1u;
}
- if (intMant == (uint)Math.Pow(2d, f + 1))
+ if (intMant == 1u << (f + 1))
{
biasedExp++;
intMant >>= 1;
@@ -409,7 +290,7 @@ namespace ChocolArm64.Instructions
float result;
- if (biasedExp >= (uint)Math.Pow(2d, e) - 1u)
+ if (biasedExp >= (1u << e) - 1u)
{
result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
@@ -666,12 +547,12 @@ namespace ChocolArm64.Instructions
{
intMant++;
- if (intMant == (uint)Math.Pow(2d, f))
+ if (intMant == 1u << f)
{
biasedExp = 1u;
}
- if (intMant == (uint)Math.Pow(2d, f + 1))
+ if (intMant == 1u << (f + 1))
{
biasedExp++;
intMant >>= 1;
@@ -682,7 +563,7 @@ namespace ChocolArm64.Instructions
if (!state.GetFpcrFlag(Fpcr.Ahp))
{
- if (biasedExp >= (uint)Math.Pow(2d, e) - 1u)
+ if (biasedExp >= (1u << e) - 1u)
{
resultBits = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
@@ -697,7 +578,7 @@ namespace ChocolArm64.Instructions
}
else
{
- if (biasedExp >= (uint)Math.Pow(2d, e))
+ if (biasedExp >= 1u << e)
{
resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu);
@@ -826,6 +707,94 @@ namespace ChocolArm64.Instructions
return result;
}
+ public static float FPCompareEQ(float value1, float value2, CpuThreadState state)
+ {
+ Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareEQ: state.Fpcr = 0x{state.Fpcr:X8}");
+
+ value1 = value1.FPUnpack(out FpType type1, out _, out _, state);
+ value2 = value2.FPUnpack(out FpType type2, out _, out _, state);
+
+ float result;
+
+ if (type1 == FpType.SNaN || type1 == FpType.QNaN || type2 == FpType.SNaN || type2 == FpType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ if (type1 == FpType.SNaN || type2 == FpType.SNaN)
+ {
+ FPProcessException(FpExc.InvalidOp, state);
+ }
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 == value2);
+ }
+
+ return result;
+ }
+
+ public static float FPCompareGE(float value1, float value2, CpuThreadState state)
+ {
+ Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareGE: state.Fpcr = 0x{state.Fpcr:X8}");
+
+ value1 = value1.FPUnpack(out FpType type1, out _, out _, state);
+ value2 = value2.FPUnpack(out FpType type2, out _, out _, state);
+
+ float result;
+
+ if (type1 == FpType.SNaN || type1 == FpType.QNaN || type2 == FpType.SNaN || type2 == FpType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ FPProcessException(FpExc.InvalidOp, state);
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 >= value2);
+ }
+
+ return result;
+ }
+
+ public static float FPCompareGT(float value1, float value2, CpuThreadState state)
+ {
+ Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareGT: state.Fpcr = 0x{state.Fpcr:X8}");
+
+ value1 = value1.FPUnpack(out FpType type1, out _, out _, state);
+ value2 = value2.FPUnpack(out FpType type2, out _, out _, state);
+
+ float result;
+
+ if (type1 == FpType.SNaN || type1 == FpType.QNaN || type2 == FpType.SNaN || type2 == FpType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ FPProcessException(FpExc.InvalidOp, state);
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 > value2);
+ }
+
+ return result;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static float FPCompareLE(float value1, float value2, CpuThreadState state)
+ {
+ Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareLE: state.Fpcr = 0x{state.Fpcr:X8}");
+
+ return FPCompareGE(value2, value1, state);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static float FPCompareLT(float value1, float value2, CpuThreadState state)
+ {
+ Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPCompareLT: state.Fpcr = 0x{state.Fpcr:X8}");
+
+ return FPCompareGT(value2, value1, state);
+ }
+
public static float FPDiv(float value1, float value2, CpuThreadState state)
{
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPDiv: state.Fpcr = 0x{state.Fpcr:X8}");
@@ -1188,6 +1157,95 @@ namespace ChocolArm64.Instructions
return result;
}
+ public static float FPRecipEstimate(float value, CpuThreadState state)
+ {
+ Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRecipEstimate: state.Fpcr = 0x{state.Fpcr:X8}");
+
+ value.FPUnpack(out FpType type, out bool sign, out uint op, state);
+
+ float result;
+
+ if (type == FpType.SNaN || type == FpType.QNaN)
+ {
+ result = FPProcessNaN(type, op, state);
+ }
+ else if (type == FpType.Infinity)
+ {
+ result = FPZero(sign);
+ }
+ else if (type == FpType.Zero)
+ {
+ result = FPInfinity(sign);
+
+ FPProcessException(FpExc.DivideByZero, state);
+ }
+ else if (MathF.Abs(value) < MathF.Pow(2f, -128))
+ {
+ bool overflowToInf;
+
+ switch (state.FPRoundingMode())
+ {
+ default:
+ case RoundMode.ToNearest: overflowToInf = true; break;
+ case RoundMode.TowardsPlusInfinity: overflowToInf = !sign; break;
+ case RoundMode.TowardsMinusInfinity: overflowToInf = sign; break;
+ case RoundMode.TowardsZero: overflowToInf = false; break;
+ }
+
+ result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
+
+ FPProcessException(FpExc.Overflow, state);
+ FPProcessException(FpExc.Inexact, state);
+ }
+ else if (state.GetFpcrFlag(Fpcr.Fz) && (MathF.Abs(value) >= MathF.Pow(2f, 126)))
+ {
+ result = FPZero(sign);
+
+ state.SetFpsrFlag(Fpsr.Ufc);
+ }
+ else
+ {
+ ulong fraction = (ulong)(op & 0x007FFFFFu) << 29;
+ uint exp = (op & 0x7F800000u) >> 23;
+
+ if (exp == 0u)
+ {
+ if ((fraction & 0x0008000000000000ul) == 0ul)
+ {
+ fraction = (fraction & 0x0003FFFFFFFFFFFFul) << 2;
+ exp -= 1u;
+ }
+ else
+ {
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ }
+ }
+
+ uint scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+
+ uint resultExp = 253u - exp;
+
+ uint estimate = (uint)SoftFloat.RecipEstimateTable[scaled - 256u] + 256u;
+
+ fraction = (ulong)(estimate & 0xFFu) << 44;
+
+ if (resultExp == 0u)
+ {
+ fraction = ((fraction & 0x000FFFFFFFFFFFFEul) | 0x0010000000000000ul) >> 1;
+ }
+ else if (resultExp + 1u == 0u)
+ {
+ fraction = ((fraction & 0x000FFFFFFFFFFFFCul) | 0x0010000000000000ul) >> 2;
+ resultExp = 0u;
+ }
+
+ result = BitConverter.Int32BitsToSingle(
+ (int)((sign ? 1u : 0u) << 31 | (resultExp & 0xFFu) << 23 | (uint)(fraction >> 29) & 0x007FFFFFu));
+ }
+
+ return result;
+ }
+
public static float FPRecipStepFused(float value1, float value2, CpuThreadState state)
{
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRecipStepFused: state.Fpcr = 0x{state.Fpcr:X8}");
@@ -1255,6 +1313,71 @@ namespace ChocolArm64.Instructions
return result;
}
+ public static float FPRSqrtEstimate(float value, CpuThreadState state)
+ {
+ Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRSqrtEstimate: state.Fpcr = 0x{state.Fpcr:X8}");
+
+ value.FPUnpack(out FpType type, out bool sign, out uint op, state);
+
+ float result;
+
+ if (type == FpType.SNaN || type == FpType.QNaN)
+ {
+ result = FPProcessNaN(type, op, state);
+ }
+ else if (type == FpType.Zero)
+ {
+ result = FPInfinity(sign);
+
+ FPProcessException(FpExc.DivideByZero, state);
+ }
+ else if (sign)
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FpExc.InvalidOp, state);
+ }
+ else if (type == FpType.Infinity)
+ {
+ result = FPZero(false);
+ }
+ else
+ {
+ ulong fraction = (ulong)(op & 0x007FFFFFu) << 29;
+ uint exp = (op & 0x7F800000u) >> 23;
+
+ if (exp == 0u)
+ {
+ while ((fraction & 0x0008000000000000ul) == 0ul)
+ {
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ exp -= 1u;
+ }
+
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ }
+
+ uint scaled;
+
+ if ((exp & 1u) == 0u)
+ {
+ scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+ }
+ else
+ {
+ scaled = (uint)(((fraction & 0x000FE00000000000ul) | 0x0010000000000000ul) >> 45);
+ }
+
+ uint resultExp = (380u - exp) >> 1;
+
+ uint estimate = (uint)SoftFloat.RecipSqrtEstimateTable[scaled - 128u] + 256u;
+
+ result = BitConverter.Int32BitsToSingle((int)((resultExp & 0xFFu) << 23 | (estimate & 0xFFu) << 15));
+ }
+
+ return result;
+ }
+
public static float FPRSqrtStepFused(float value1, float value2, CpuThreadState state)
{
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat32.FPRSqrtStepFused: state.Fpcr = 0x{state.Fpcr:X8}");
@@ -1402,6 +1525,11 @@ namespace ChocolArm64.Instructions
return sign ? -0f : +0f;
}
+ private static float FPMaxNormal(bool sign)
+ {
+ return sign ? float.MinValue : float.MaxValue;
+ }
+
private static float FPTwo(bool sign)
{
return sign ? -2f : +2f;
@@ -1417,6 +1545,11 @@ namespace ChocolArm64.Instructions
return -value;
}
+ private static float ZerosOrOnes(bool zeros)
+ {
+ return BitConverter.Int32BitsToSingle(!zeros ? 0 : -1);
+ }
+
private static float FPUnpack(
this float value,
out FpType type,
@@ -1658,6 +1791,94 @@ namespace ChocolArm64.Instructions
return result;
}
+ public static double FPCompareEQ(double value1, double value2, CpuThreadState state)
+ {
+ Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareEQ: state.Fpcr = 0x{state.Fpcr:X8}");
+
+ value1 = value1.FPUnpack(out FpType type1, out _, out _, state);
+ value2 = value2.FPUnpack(out FpType type2, out _, out _, state);
+
+ double result;
+
+ if (type1 == FpType.SNaN || type1 == FpType.QNaN || type2 == FpType.SNaN || type2 == FpType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ if (type1 == FpType.SNaN || type2 == FpType.SNaN)
+ {
+ FPProcessException(FpExc.InvalidOp, state);
+ }
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 == value2);
+ }
+
+ return result;
+ }
+
+ public static double FPCompareGE(double value1, double value2, CpuThreadState state)
+ {
+ Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareGE: state.Fpcr = 0x{state.Fpcr:X8}");
+
+ value1 = value1.FPUnpack(out FpType type1, out _, out _, state);
+ value2 = value2.FPUnpack(out FpType type2, out _, out _, state);
+
+ double result;
+
+ if (type1 == FpType.SNaN || type1 == FpType.QNaN || type2 == FpType.SNaN || type2 == FpType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ FPProcessException(FpExc.InvalidOp, state);
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 >= value2);
+ }
+
+ return result;
+ }
+
+ public static double FPCompareGT(double value1, double value2, CpuThreadState state)
+ {
+ Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareGT: state.Fpcr = 0x{state.Fpcr:X8}");
+
+ value1 = value1.FPUnpack(out FpType type1, out _, out _, state);
+ value2 = value2.FPUnpack(out FpType type2, out _, out _, state);
+
+ double result;
+
+ if (type1 == FpType.SNaN || type1 == FpType.QNaN || type2 == FpType.SNaN || type2 == FpType.QNaN)
+ {
+ result = ZerosOrOnes(false);
+
+ FPProcessException(FpExc.InvalidOp, state);
+ }
+ else
+ {
+ result = ZerosOrOnes(value1 > value2);
+ }
+
+ return result;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static double FPCompareLE(double value1, double value2, CpuThreadState state)
+ {
+ Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareLE: state.Fpcr = 0x{state.Fpcr:X8}");
+
+ return FPCompareGE(value2, value1, state);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static double FPCompareLT(double value1, double value2, CpuThreadState state)
+ {
+ Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPCompareLT: state.Fpcr = 0x{state.Fpcr:X8}");
+
+ return FPCompareGT(value2, value1, state);
+ }
+
public static double FPDiv(double value1, double value2, CpuThreadState state)
{
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPDiv: state.Fpcr = 0x{state.Fpcr:X8}");
@@ -2020,6 +2241,95 @@ namespace ChocolArm64.Instructions
return result;
}
+ public static double FPRecipEstimate(double value, CpuThreadState state)
+ {
+ Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRecipEstimate: state.Fpcr = 0x{state.Fpcr:X8}");
+
+ value.FPUnpack(out FpType type, out bool sign, out ulong op, state);
+
+ double result;
+
+ if (type == FpType.SNaN || type == FpType.QNaN)
+ {
+ result = FPProcessNaN(type, op, state);
+ }
+ else if (type == FpType.Infinity)
+ {
+ result = FPZero(sign);
+ }
+ else if (type == FpType.Zero)
+ {
+ result = FPInfinity(sign);
+
+ FPProcessException(FpExc.DivideByZero, state);
+ }
+ else if (Math.Abs(value) < Math.Pow(2d, -1024))
+ {
+ bool overflowToInf;
+
+ switch (state.FPRoundingMode())
+ {
+ default:
+ case RoundMode.ToNearest: overflowToInf = true; break;
+ case RoundMode.TowardsPlusInfinity: overflowToInf = !sign; break;
+ case RoundMode.TowardsMinusInfinity: overflowToInf = sign; break;
+ case RoundMode.TowardsZero: overflowToInf = false; break;
+ }
+
+ result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign);
+
+ FPProcessException(FpExc.Overflow, state);
+ FPProcessException(FpExc.Inexact, state);
+ }
+ else if (state.GetFpcrFlag(Fpcr.Fz) && (Math.Abs(value) >= Math.Pow(2d, 1022)))
+ {
+ result = FPZero(sign);
+
+ state.SetFpsrFlag(Fpsr.Ufc);
+ }
+ else
+ {
+ ulong fraction = op & 0x000FFFFFFFFFFFFFul;
+ uint exp = (uint)((op & 0x7FF0000000000000ul) >> 52);
+
+ if (exp == 0u)
+ {
+ if ((fraction & 0x0008000000000000ul) == 0ul)
+ {
+ fraction = (fraction & 0x0003FFFFFFFFFFFFul) << 2;
+ exp -= 1u;
+ }
+ else
+ {
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ }
+ }
+
+ uint scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+
+ uint resultExp = 2045u - exp;
+
+ uint estimate = (uint)SoftFloat.RecipEstimateTable[scaled - 256u] + 256u;
+
+ fraction = (ulong)(estimate & 0xFFu) << 44;
+
+ if (resultExp == 0u)
+ {
+ fraction = ((fraction & 0x000FFFFFFFFFFFFEul) | 0x0010000000000000ul) >> 1;
+ }
+ else if (resultExp + 1u == 0u)
+ {
+ fraction = ((fraction & 0x000FFFFFFFFFFFFCul) | 0x0010000000000000ul) >> 2;
+ resultExp = 0u;
+ }
+
+ result = BitConverter.Int64BitsToDouble(
+ (long)((sign ? 1ul : 0ul) << 63 | (resultExp & 0x7FFul) << 52 | (fraction & 0x000FFFFFFFFFFFFFul)));
+ }
+
+ return result;
+ }
+
public static double FPRecipStepFused(double value1, double value2, CpuThreadState state)
{
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRecipStepFused: state.Fpcr = 0x{state.Fpcr:X8}");
@@ -2087,6 +2397,71 @@ namespace ChocolArm64.Instructions
return result;
}
+ public static double FPRSqrtEstimate(double value, CpuThreadState state)
+ {
+ Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRSqrtEstimate: state.Fpcr = 0x{state.Fpcr:X8}");
+
+ value.FPUnpack(out FpType type, out bool sign, out ulong op, state);
+
+ double result;
+
+ if (type == FpType.SNaN || type == FpType.QNaN)
+ {
+ result = FPProcessNaN(type, op, state);
+ }
+ else if (type == FpType.Zero)
+ {
+ result = FPInfinity(sign);
+
+ FPProcessException(FpExc.DivideByZero, state);
+ }
+ else if (sign)
+ {
+ result = FPDefaultNaN();
+
+ FPProcessException(FpExc.InvalidOp, state);
+ }
+ else if (type == FpType.Infinity)
+ {
+ result = FPZero(false);
+ }
+ else
+ {
+ ulong fraction = op & 0x000FFFFFFFFFFFFFul;
+ uint exp = (uint)((op & 0x7FF0000000000000ul) >> 52);
+
+ if (exp == 0u)
+ {
+ while ((fraction & 0x0008000000000000ul) == 0ul)
+ {
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ exp -= 1u;
+ }
+
+ fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1;
+ }
+
+ uint scaled;
+
+ if ((exp & 1u) == 0u)
+ {
+ scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44);
+ }
+ else
+ {
+ scaled = (uint)(((fraction & 0x000FE00000000000ul) | 0x0010000000000000ul) >> 45);
+ }
+
+ uint resultExp = (3068u - exp) >> 1;
+
+ uint estimate = (uint)SoftFloat.RecipSqrtEstimateTable[scaled - 128u] + 256u;
+
+ result = BitConverter.Int64BitsToDouble((long)((resultExp & 0x7FFul) << 52 | (estimate & 0xFFul) << 44));
+ }
+
+ return result;
+ }
+
public static double FPRSqrtStepFused(double value1, double value2, CpuThreadState state)
{
Debug.WriteLineIf(state.Fpcr != 0, $"SoftFloat64.FPRSqrtStepFused: state.Fpcr = 0x{state.Fpcr:X8}");
@@ -2234,6 +2609,11 @@ namespace ChocolArm64.Instructions
return sign ? -0d : +0d;
}
+ private static double FPMaxNormal(bool sign)
+ {
+ return sign ? double.MinValue : double.MaxValue;
+ }
+
private static double FPTwo(bool sign)
{
return sign ? -2d : +2d;
@@ -2249,6 +2629,11 @@ namespace ChocolArm64.Instructions
return -value;
}
+ private static double ZerosOrOnes(bool zeros)
+ {
+ return BitConverter.Int64BitsToDouble(!zeros ? 0L : -1L);
+ }
+
private static double FPUnpack(
this double value,
out FpType type,
diff --git a/ChocolArm64/OpCodeTable.cs b/ChocolArm64/OpCodeTable.cs
index 845a48d0..adb71ae7 100644
--- a/ChocolArm64/OpCodeTable.cs
+++ b/ChocolArm64/OpCodeTable.cs
@@ -222,6 +222,7 @@ namespace ChocolArm64
SetA64("0x101110001xxxxx000111xxxxxxxxxx", InstEmit.Eor_V, typeof(OpCodeSimdReg64));
SetA64("0>101110000xxxxx0<xxx0xxxxxxxxxx", InstEmit.Ext_V, typeof(OpCodeSimdExt64));
SetA64("011111101x1xxxxx110101xxxxxxxxxx", InstEmit.Fabd_S, typeof(OpCodeSimdReg64));
+ SetA64("0>1011101<1xxxxx110101xxxxxxxxxx", InstEmit.Fabd_V, typeof(OpCodeSimdReg64));
SetA64("000111100x100000110000xxxxxxxxxx", InstEmit.Fabs_S, typeof(OpCodeSimd64));
SetA64("0>0011101<100000111110xxxxxxxxxx", InstEmit.Fabs_V, typeof(OpCodeSimd64));
SetA64("000111100x1xxxxx001010xxxxxxxxxx", InstEmit.Fadd_S, typeof(OpCodeSimdReg64));
diff --git a/ChocolArm64/Optimizations.cs b/ChocolArm64/Optimizations.cs
index aab5eca7..8fa6f462 100644
--- a/ChocolArm64/Optimizations.cs
+++ b/ChocolArm64/Optimizations.cs
@@ -8,12 +8,14 @@ public static class Optimizations
private static bool _useSseIfAvailable = true;
private static bool _useSse2IfAvailable = true;
+ private static bool _useSse3IfAvailable = true;
private static bool _useSsse3IfAvailable = true;
private static bool _useSse41IfAvailable = true;
private static bool _useSse42IfAvailable = true;
internal static bool UseSse = (_useAllSseIfAvailable && _useSseIfAvailable) && Sse.IsSupported;
internal static bool UseSse2 = (_useAllSseIfAvailable && _useSse2IfAvailable) && Sse2.IsSupported;
+ internal static bool UseSse3 = (_useAllSseIfAvailable && _useSse3IfAvailable) && Sse3.IsSupported;
internal static bool UseSsse3 = (_useAllSseIfAvailable && _useSsse3IfAvailable) && Ssse3.IsSupported;
internal static bool UseSse41 = (_useAllSseIfAvailable && _useSse41IfAvailable) && Sse41.IsSupported;
internal static bool UseSse42 = (_useAllSseIfAvailable && _useSse42IfAvailable) && Sse42.IsSupported;