diff options
| author | LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> | 2018-11-18 03:41:16 +0100 |
|---|---|---|
| committer | gdkchan <gab.dark.100@gmail.com> | 2018-11-18 00:41:16 -0200 |
| commit | e603b7afbcdff0fc732304872f5a65d410c601f9 (patch) | |
| tree | d1949402bc6c6edd5a3d6e2ea40d9033a3d2f654 /ChocolArm64/Instructions/InstEmitSimdArithmetic.cs | |
| parent | b7613dd4b8a535d028ae180ee3a4b574abe4e3e0 (diff) | |
Add Sse Opt. for S/Umax_V, S/Umin_V, S/Uaddw_V, S/Usubw_V, Fabs_S/V, Fneg_S/V Inst.; for Fcvtl_V, Fcvtn_V Inst.; and for Fcmp_S Inst.. Add/Improve other Sse Opt.. Add Tests. (#496)
* Update CpuTest.cs
* Update CpuTestSimd.cs
* Update CpuTestSimdReg.cs
* Update InstEmitSimdCmp.cs
* Update SoftFloat.cs
* Update InstEmitAluHelper.cs
* Update InstEmitSimdArithmetic.cs
* Update InstEmitSimdHelper.cs
* Update VectorHelper.cs
* Update InstEmitSimdCvt.cs
* Update InstEmitSimdArithmetic.cs
* Update CpuTestSimd.cs
* Update InstEmitSimdArithmetic.cs
* Update OpCodeTable.cs
* Update InstEmitSimdArithmetic.cs
* Update InstEmitSimdCmp.cs
* Update InstEmitSimdCvt.cs
* Update CpuTestSimd.cs
* Update CpuTestSimdReg.cs
* Create CpuTestSimdFcond.cs
* Update OpCodeTable.cs
* Update InstEmitSimdMove.cs
* Update CpuTestSimdIns.cs
* Create CpuTestSimdExt.cs
* Nit.
* Update PackageReference.
Diffstat (limited to 'ChocolArm64/Instructions/InstEmitSimdArithmetic.cs')
| -rw-r--r-- | ChocolArm64/Instructions/InstEmitSimdArithmetic.cs | 748 |
1 files changed, 687 insertions, 61 deletions
diff --git a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs index 5668bb64..c05e9f94 100644 --- a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs @@ -186,18 +186,101 @@ namespace ChocolArm64.Instructions public static void Fabs_S(ILEmitterCtx context) { - EmitScalarUnaryOpF(context, () => + if (Optimizations.UseSse2) { - EmitUnaryMathCall(context, nameof(Math.Abs)); - }); + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + if (op.Size == 0) + { + Type[] typesSsv = new Type[] { typeof(float) }; + Type[] typesAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; + + context.EmitLdc_R4(-0f); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv)); + + context.EmitLdvec(op.Rn); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAndNot)); + + context.EmitStvec(op.Rd); + + EmitVectorZero32_128(context, op.Rd); + } + else /* if (op.Size == 1) */ + { + Type[] typesSsv = new Type[] { typeof(double) }; + Type[] typesAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; + + context.EmitLdc_R8(-0d); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv)); + + EmitLdvecWithCastToDouble(context, op.Rn); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot)); + + EmitStvecWithCastFromDouble(context, op.Rd); + + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitScalarUnaryOpF(context, () => + { + EmitUnaryMathCall(context, nameof(Math.Abs)); + }); + } } public static void Fabs_V(ILEmitterCtx context) { - EmitVectorUnaryOpF(context, () => + if (Optimizations.UseSse2) { - EmitUnaryMathCall(context, nameof(Math.Abs)); - }); + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesSav = new Type[] { typeof(float) }; + Type[] typesAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; + + context.EmitLdc_R4(-0f); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav)); + + context.EmitLdvec(op.Rn); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAndNot)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (sizeF == 1) */ + { + Type[] typesSav = new Type[] { typeof(double) }; + Type[] typesAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; + + context.EmitLdc_R8(-0d); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + EmitLdvecWithCastToDouble(context, op.Rn); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorUnaryOpF(context, () => + { + EmitUnaryMathCall(context, nameof(Math.Abs)); + }); + } } public static void Fadd_S(ILEmitterCtx context) @@ -283,7 +366,7 @@ namespace ChocolArm64.Instructions } } - public static void Fmadd_S(ILEmitterCtx context) + public static void Fmadd_S(ILEmitterCtx context) // Fused. { if (Optimizations.FastFP && Optimizations.UseSse2) { @@ -450,22 +533,118 @@ namespace ChocolArm64.Instructions }); } - public static void Fmla_V(ILEmitterCtx context) + public static void Fmla_V(ILEmitterCtx context) // Fused. { - EmitVectorTernaryOpF(context, () => + if (Optimizations.FastFP && Optimizations.UseSse2) { - context.Emit(OpCodes.Mul); - context.Emit(OpCodes.Add); - }); + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesMulAdd = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; + + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulAdd)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Add), typesMulAdd)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (sizeF == 1) */ + { + Type[] typesMulAdd = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; + + EmitLdvecWithCastToDouble(context, op.Rd); + EmitLdvecWithCastToDouble(context, op.Rn); + EmitLdvecWithCastToDouble(context, op.Rm); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulAdd)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorTernaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd)); + }); + } } - public static void Fmla_Ve(ILEmitterCtx context) + public static void Fmla_Ve(ILEmitterCtx context) // Fused. { - EmitVectorTernaryOpByElemF(context, () => + if (Optimizations.FastFP && Optimizations.UseSse2) { - context.Emit(OpCodes.Mul); - context.Emit(OpCodes.Add); - }); + OpCodeSimdRegElemF64 op = (OpCodeSimdRegElemF64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesSfl = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>), typeof(byte) }; + Type[] typesMulAdd = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; + + context.EmitLdvec(op.Rd); + + context.EmitLdvec(op.Rn); + + context.EmitLdvec(op.Rm); + context.Emit(OpCodes.Dup); + + context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulAdd)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Add), typesMulAdd)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (sizeF == 1) */ + { + Type[] typesSfl = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>), typeof(byte) }; + Type[] typesMulAdd = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; + + EmitLdvecWithCastToDouble(context, op.Rd); + + EmitLdvecWithCastToDouble(context, op.Rn); + + EmitLdvecWithCastToDouble(context, op.Rm); + context.Emit(OpCodes.Dup); + + context.EmitLdc_I4(op.Index | op.Index << 1); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulAdd)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorTernaryOpByElemF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd)); + }); + } } public static void Fmls_Se(ILEmitterCtx context) @@ -477,25 +656,121 @@ namespace ChocolArm64.Instructions }); } - public static void Fmls_V(ILEmitterCtx context) + public static void Fmls_V(ILEmitterCtx context) // Fused. { - EmitVectorTernaryOpF(context, () => + if (Optimizations.FastFP && Optimizations.UseSse2) { - context.Emit(OpCodes.Mul); - context.Emit(OpCodes.Sub); - }); + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesMulSub = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; + + context.EmitLdvec(op.Rd); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesMulSub)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (sizeF == 1) */ + { + Type[] typesMulSub = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; + + EmitLdvecWithCastToDouble(context, op.Rd); + EmitLdvecWithCastToDouble(context, op.Rn); + EmitLdvecWithCastToDouble(context, op.Rm); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorTernaryOpF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub)); + }); + } } - public static void Fmls_Ve(ILEmitterCtx context) + public static void Fmls_Ve(ILEmitterCtx context) // Fused. { - EmitVectorTernaryOpByElemF(context, () => + if (Optimizations.FastFP && Optimizations.UseSse2) { - context.Emit(OpCodes.Mul); - context.Emit(OpCodes.Sub); - }); + OpCodeSimdRegElemF64 op = (OpCodeSimdRegElemF64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesSfl = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>), typeof(byte) }; + Type[] typesMulSub = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; + + context.EmitLdvec(op.Rd); + + context.EmitLdvec(op.Rn); + + context.EmitLdvec(op.Rm); + context.Emit(OpCodes.Dup); + + context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesMulSub)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (sizeF == 1) */ + { + Type[] typesSfl = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>), typeof(byte) }; + Type[] typesMulSub = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; + + EmitLdvecWithCastToDouble(context, op.Rd); + + EmitLdvecWithCastToDouble(context, op.Rn); + + EmitLdvecWithCastToDouble(context, op.Rm); + context.Emit(OpCodes.Dup); + + context.EmitLdc_I4(op.Index | op.Index << 1); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorTernaryOpByElemF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub)); + }); + } } - public static void Fmsub_S(ILEmitterCtx context) + public static void Fmsub_S(ILEmitterCtx context) // Fused. { if (Optimizations.FastFP && Optimizations.UseSse2) { @@ -580,7 +855,59 @@ namespace ChocolArm64.Instructions public static void Fmul_Ve(ILEmitterCtx context) { - EmitVectorBinaryOpByElemF(context, () => context.Emit(OpCodes.Mul)); + if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdRegElemF64 op = (OpCodeSimdRegElemF64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesSfl = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>), typeof(byte) }; + Type[] typesMul = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; + + context.EmitLdvec(op.Rn); + + context.EmitLdvec(op.Rm); + context.Emit(OpCodes.Dup); + + context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMul)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (sizeF == 1) */ + { + Type[] typesSfl = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>), typeof(byte) }; + Type[] typesMul = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; + + EmitLdvecWithCastToDouble(context, op.Rn); + + EmitLdvecWithCastToDouble(context, op.Rm); + context.Emit(OpCodes.Dup); + + context.EmitLdc_I4(op.Index | op.Index << 1); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMul)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorBinaryOpByElemF(context, () => + { + EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul)); + }); + } } public static void Fmulx_S(ILEmitterCtx context) @@ -617,12 +944,95 @@ namespace ChocolArm64.Instructions public static void Fneg_S(ILEmitterCtx context) { - EmitScalarUnaryOpF(context, () => context.Emit(OpCodes.Neg)); + if (Optimizations.UseSse2) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + if (op.Size == 0) + { + Type[] typesSsv = new Type[] { typeof(float) }; + Type[] typesXor = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; + + context.EmitLdc_R4(-0f); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv)); + + context.EmitLdvec(op.Rn); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Xor), typesXor)); + + context.EmitStvec(op.Rd); + + EmitVectorZero32_128(context, op.Rd); + } + else /* if (op.Size == 1) */ + { + Type[] typesSsv = new Type[] { typeof(double) }; + Type[] typesXor = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; + + context.EmitLdc_R8(-0d); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv)); + + EmitLdvecWithCastToDouble(context, op.Rn); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXor)); + + EmitStvecWithCastFromDouble(context, op.Rd); + + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + EmitScalarUnaryOpF(context, () => context.Emit(OpCodes.Neg)); + } } public static void Fneg_V(ILEmitterCtx context) { - EmitVectorUnaryOpF(context, () => context.Emit(OpCodes.Neg)); + if (Optimizations.UseSse2) + { + OpCodeSimd64 op = (OpCodeSimd64)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Type[] typesSav = new Type[] { typeof(float) }; + Type[] typesXor = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; + + context.EmitLdc_R4(-0f); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav)); + + context.EmitLdvec(op.Rn); + + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Xor), typesXor)); + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else /* if (sizeF == 1) */ + { + Type[] typesSav = new Type[] { typeof(double) }; + Type[] typesXor = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; + + context.EmitLdc_R8(-0d); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + EmitLdvecWithCastToDouble(context, op.Rn); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesXor)); + + EmitStvecWithCastFromDouble(context, op.Rd); + } + } + else + { + EmitVectorUnaryOpF(context, () => context.Emit(OpCodes.Neg)); + } } public static void Fnmadd_S(ILEmitterCtx context) @@ -689,7 +1099,7 @@ namespace ChocolArm64.Instructions }); } - public static void Frecps_S(ILEmitterCtx context) + public static void Frecps_S(ILEmitterCtx context) // Fused. { if (Optimizations.FastFP && Optimizations.UseSse2) { @@ -743,7 +1153,7 @@ namespace ChocolArm64.Instructions } } - public static void Frecps_V(ILEmitterCtx context) + public static void Frecps_V(ILEmitterCtx context) // Fused. { if (Optimizations.FastFP && Optimizations.UseSse2) { @@ -986,7 +1396,7 @@ namespace ChocolArm64.Instructions }); } - public static void Frsqrts_S(ILEmitterCtx context) + public static void Frsqrts_S(ILEmitterCtx context) // Fused. { if (Optimizations.FastFP && Optimizations.UseSse2) { @@ -1048,7 +1458,7 @@ namespace ChocolArm64.Instructions } } - public static void Frsqrts_V(ILEmitterCtx context) + public static void Frsqrts_V(ILEmitterCtx context) // Fused. { if (Optimizations.FastFP && Optimizations.UseSse2) { @@ -1310,7 +1720,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); - EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + EmitLdvecWithSignedCast(context, op.Rm, op.Size); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); @@ -1334,7 +1744,38 @@ namespace ChocolArm64.Instructions public static void Saddw_V(ILEmitterCtx context) { - EmitVectorWidenRmBinaryOpSx(context, () => context.Emit(OpCodes.Add)); + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesSrl = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] }; + Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], + VectorIntTypesPerSizeLog2[op.Size + 1] }; + + string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16), + nameof(Sse41.ConvertToVector128Int32), + nameof(Sse41.ConvertToVector128Int64) }; + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithSignedCast(context, op.Rn, op.Size + 1); + + EmitLdvecWithSignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRmBinaryOpSx(context, () => context.Emit(OpCodes.Add)); + } } public static void Shadd_V(ILEmitterCtx context) @@ -1439,11 +1880,34 @@ namespace ChocolArm64.Instructions public static void Smax_V(ILEmitterCtx context) { - Type[] types = new Type[] { typeof(long), typeof(long) }; + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; - MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types); + Type[] typesMax = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; + + Type typeSse = op.Size == 1 ? typeof(Sse2) : typeof(Sse41); + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + EmitLdvecWithSignedCast(context, op.Rm, op.Size); + + context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + Type[] types = new Type[] { typeof(long), typeof(long) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types); - EmitVectorBinaryOpSx(context, () => context.EmitCall(mthdInfo)); + EmitVectorBinaryOpSx(context, () => context.EmitCall(mthdInfo)); + } } public static void Smaxp_V(ILEmitterCtx context) @@ -1457,11 +1921,34 @@ namespace ChocolArm64.Instructions public static void Smin_V(ILEmitterCtx context) { - Type[] types = new Type[] { typeof(long), typeof(long) }; + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; - MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types); + Type[] typesMin = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; + + Type typeSse = op.Size == 1 ? typeof(Sse2) : typeof(Sse41); + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + EmitLdvecWithSignedCast(context, op.Rm, op.Size); + + context.EmitCall(typeSse.GetMethod(nameof(Sse2.Min), typesMin)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + Type[] types = new Type[] { typeof(long), typeof(long) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types); - EmitVectorBinaryOpSx(context, () => context.EmitCall(mthdInfo)); + EmitVectorBinaryOpSx(context, () => context.EmitCall(mthdInfo)); + } } public static void Sminp_V(ILEmitterCtx context) @@ -1484,7 +1971,7 @@ namespace ChocolArm64.Instructions Type[] typesMulAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], VectorIntTypesPerSizeLog2[op.Size + 1] }; - Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); string nameCvt = op.Size == 0 ? nameof(Sse41.ConvertToVector128Int16) @@ -1508,7 +1995,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); - context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulAdd)); + context.EmitCall(typeSse.GetMethod(nameof(Sse2.MultiplyLow), typesMulAdd)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); @@ -1535,7 +2022,7 @@ namespace ChocolArm64.Instructions Type[] typesMulSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], VectorIntTypesPerSizeLog2[op.Size + 1] }; - Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); string nameCvt = op.Size == 0 ? nameof(Sse41.ConvertToVector128Int16) @@ -1559,7 +2046,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); - context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulSub)); + context.EmitCall(typeSse.GetMethod(nameof(Sse2.MultiplyLow), typesMulSub)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); @@ -1735,7 +2222,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); - EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + EmitLdvecWithSignedCast(context, op.Rm, op.Size); context.EmitLdc_I4(numBytes); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); @@ -1754,7 +2241,38 @@ namespace ChocolArm64.Instructions public static void Ssubw_V(ILEmitterCtx context) { - EmitVectorWidenRmBinaryOpSx(context, () => context.Emit(OpCodes.Sub)); + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesSrl = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorIntTypesPerSizeLog2[op.Size] }; + Type[] typesSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size + 1], + VectorIntTypesPerSizeLog2[op.Size + 1] }; + + string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16), + nameof(Sse41.ConvertToVector128Int32), + nameof(Sse41.ConvertToVector128Int64) }; + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithSignedCast(context, op.Rn, op.Size + 1); + + EmitLdvecWithSignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub)); + + EmitStvecWithSignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRmBinaryOpSx(context, () => context.Emit(OpCodes.Sub)); + } } public static void Sub_S(ILEmitterCtx context) @@ -1901,7 +2419,38 @@ namespace ChocolArm64.Instructions public static void Uaddw_V(ILEmitterCtx context) { - EmitVectorWidenRmBinaryOpZx(context, () => context.Emit(OpCodes.Add)); + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] }; + Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], + VectorUIntTypesPerSizeLog2[op.Size + 1] }; + + string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16), + nameof(Sse41.ConvertToVector128Int32), + nameof(Sse41.ConvertToVector128Int64) }; + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size + 1); + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRmBinaryOpZx(context, () => context.Emit(OpCodes.Add)); + } } public static void Uhadd_V(ILEmitterCtx context) @@ -1992,11 +2541,34 @@ namespace ChocolArm64.Instructions public static void Umax_V(ILEmitterCtx context) { - Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; - MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types); + Type[] typesMax = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; + + Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitCall(typeSse.GetMethod(nameof(Sse2.Max), typesMax)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; - EmitVectorBinaryOpZx(context, () => context.EmitCall(mthdInfo)); + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Max), types); + + EmitVectorBinaryOpZx(context, () => context.EmitCall(mthdInfo)); + } } public static void Umaxp_V(ILEmitterCtx context) @@ -2010,11 +2582,34 @@ namespace ChocolArm64.Instructions public static void Umin_V(ILEmitterCtx context) { - Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; - MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types); + Type[] typesMin = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; + + Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitCall(typeSse.GetMethod(nameof(Sse2.Min), typesMin)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } + else + { + Type[] types = new Type[] { typeof(ulong), typeof(ulong) }; + + MethodInfo mthdInfo = typeof(Math).GetMethod(nameof(Math.Min), types); - EmitVectorBinaryOpZx(context, () => context.EmitCall(mthdInfo)); + EmitVectorBinaryOpZx(context, () => context.EmitCall(mthdInfo)); + } } public static void Uminp_V(ILEmitterCtx context) @@ -2037,7 +2632,7 @@ namespace ChocolArm64.Instructions Type[] typesMulAdd = new Type[] { VectorIntTypesPerSizeLog2 [op.Size + 1], VectorIntTypesPerSizeLog2 [op.Size + 1] }; - Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); string nameCvt = op.Size == 0 ? nameof(Sse41.ConvertToVector128Int16) @@ -2061,7 +2656,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); - context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulAdd)); + context.EmitCall(typeSse.GetMethod(nameof(Sse2.MultiplyLow), typesMulAdd)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); @@ -2088,7 +2683,7 @@ namespace ChocolArm64.Instructions Type[] typesMulSub = new Type[] { VectorIntTypesPerSizeLog2 [op.Size + 1], VectorIntTypesPerSizeLog2 [op.Size + 1] }; - Type typeMul = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); + Type typeSse = op.Size == 0 ? typeof(Sse2) : typeof(Sse41); string nameCvt = op.Size == 0 ? nameof(Sse41.ConvertToVector128Int16) @@ -2112,7 +2707,7 @@ namespace ChocolArm64.Instructions context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); - context.EmitCall(typeMul.GetMethod(nameof(Sse2.MultiplyLow), typesMulSub)); + context.EmitCall(typeSse.GetMethod(nameof(Sse2.MultiplyLow), typesMulSub)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); @@ -2251,7 +2846,38 @@ namespace ChocolArm64.Instructions public static void Usubw_V(ILEmitterCtx context) { - EmitVectorWidenRmBinaryOpZx(context, () => context.Emit(OpCodes.Sub)); + if (Optimizations.UseSse41) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; + Type[] typesCvt = new Type[] { VectorUIntTypesPerSizeLog2[op.Size] }; + Type[] typesSub = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], + VectorUIntTypesPerSizeLog2[op.Size + 1] }; + + string[] namesCvt = new string[] { nameof(Sse41.ConvertToVector128Int16), + nameof(Sse41.ConvertToVector128Int32), + nameof(Sse41.ConvertToVector128Int64) }; + + int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; + + EmitLdvecWithUnsignedCast(context, op.Rn, op.Size + 1); + + EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); + + context.EmitLdc_I4(numBytes); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + + context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSub)); + + EmitStvecWithUnsignedCast(context, op.Rd, op.Size + 1); + } + else + { + EmitVectorWidenRmBinaryOpZx(context, () => context.Emit(OpCodes.Sub)); + } } private static void EmitAbs(ILEmitterCtx context) |
