diff options
| author | LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> | 2019-03-13 09:23:52 +0100 |
|---|---|---|
| committer | jduncanator <1518948+jduncanator@users.noreply.github.com> | 2019-03-13 19:23:52 +1100 |
| commit | 1bef70c068f8aeb6a3a518b8ca635de19122da14 (patch) | |
| tree | 84d3ead95523f4803de1e6288f38ad45d6039005 /ChocolArm64/Instructions/InstEmitSimdArithmetic.cs | |
| parent | a0aecd1ff85437890bb6a86fcc71fc90e80a4d24 (diff) | |
Add Rshrn_V & Shrn_V Sse opt.. Add Mla_V, Mls_V & Mul_V Sse opt.; add Tests. (#614)
* Update CountLeadingZeros().
* Remove obsolete Tests.
* Follow-up.
* Follow-up.
* Follow-up.
* Add Mla_V, Mls_V & Mul_V Tests.
* Update PackageReferences.
* Remove EmitLd/Stvectmp2().
* Remove Dup. Nits.
* Remove EmitLd/Stvectmp2() & Dup; nits.
* Remove Tmp stuff & Dup; rework Fcvtz() as Fcvtn().
* Remove Tmp stuff, EmitLd/Stvectmp2() & Dup. Nits.
* Add (R)shrn_V Sse opt.; add "Part" & "Shift" opt..
Remove Tmp stuff; remove Dup.
Nits.
* Add Mla/Mls/Mul_V Sse opt.. Add "Part" opt..
Remove EmitLd/Stvectmp2(), remove Dup.
Nits.
* Nits.
* Nits.
* Nit.
* Add "Part" opt.. Nit.
* Nit.
* Nit.
* Add Cmhi_V & Cmhs_V Sse opt..
Diffstat (limited to 'ChocolArm64/Instructions/InstEmitSimdArithmetic.cs')
| -rw-r--r-- | ChocolArm64/Instructions/InstEmitSimdArithmetic.cs | 407 |
1 files changed, 268 insertions, 139 deletions
diff --git a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs index f7236e9a..5ceea774 100644 --- a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs @@ -1,4 +1,5 @@ // https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h +// https://www.agner.org/optimize/#vectorclass @ vectori128.h using ChocolArm64.Decoders; using ChocolArm64.State; @@ -184,8 +185,8 @@ namespace ChocolArm64.Instructions if (sizeF == 0) { - Type[] typesSsv = new Type[] { typeof(float) }; - Type[] typesSubAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; + Type[] typesSsv = new Type[] { typeof(float) }; + Type[] typesSubAnt = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; context.EmitLdc_R4(-0f); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv)); @@ -193,8 +194,8 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesSubAndNot)); - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesSubAnt)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAnt)); context.EmitStvec(op.Rd); @@ -202,8 +203,8 @@ namespace ChocolArm64.Instructions } else /* if (sizeF == 1) */ { - Type[] typesSsv = new Type[] { typeof(double) }; - Type[] typesSubAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; + Type[] typesSsv = new Type[] { typeof(double) }; + Type[] typesSubAnt = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; context.EmitLdc_R8(-0d); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv)); @@ -211,8 +212,8 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesSubAndNot)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesSubAnt)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAnt)); context.EmitStvec(op.Rd); @@ -240,8 +241,8 @@ namespace ChocolArm64.Instructions if (sizeF == 0) { - Type[] typesSav = new Type[] { typeof(float) }; - Type[] typesSubAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; + Type[] typesSav = new Type[] { typeof(float) }; + Type[] typesSubAnt = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; context.EmitLdc_R4(-0f); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav)); @@ -249,8 +250,8 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesSubAndNot)); - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesSubAnt)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAnt)); context.EmitStvec(op.Rd); @@ -261,8 +262,8 @@ namespace ChocolArm64.Instructions } else /* if (sizeF == 1) */ { - Type[] typesSav = new Type[] { typeof(double) }; - Type[] typesSubAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; + Type[] typesSav = new Type[] { typeof(double) }; + Type[] typesSubAnt = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; context.EmitLdc_R8(-0d); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); @@ -270,8 +271,8 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAndNot)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAnt)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAnt)); context.EmitStvec(op.Rd); } @@ -295,15 +296,15 @@ namespace ChocolArm64.Instructions if (op.Size == 0) { - Type[] typesSsv = new Type[] { typeof(float) }; - Type[] typesAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; + Type[] typesSsv = new Type[] { typeof(float) }; + Type[] typesAnt = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; context.EmitLdc_R4(-0f); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv)); context.EmitLdvec(op.Rn); - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAndNot)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAnt)); context.EmitStvec(op.Rd); @@ -311,15 +312,15 @@ namespace ChocolArm64.Instructions } else /* if (op.Size == 1) */ { - Type[] typesSsv = new Type[] { typeof(double) }; - Type[] typesAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; + Type[] typesSsv = new Type[] { typeof(double) }; + Type[] typesAnt = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; context.EmitLdc_R8(-0d); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv)); context.EmitLdvec(op.Rn); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt)); context.EmitStvec(op.Rd); @@ -345,15 +346,15 @@ namespace ChocolArm64.Instructions if (sizeF == 0) { - Type[] typesSav = new Type[] { typeof(float) }; - Type[] typesAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; + Type[] typesSav = new Type[] { typeof(float) }; + Type[] typesAnt = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; context.EmitLdc_R4(-0f); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav)); context.EmitLdvec(op.Rn); - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAndNot)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAnt)); context.EmitStvec(op.Rd); @@ -364,15 +365,15 @@ namespace ChocolArm64.Instructions } else /* if (sizeF == 1) */ { - Type[] typesSav = new Type[] { typeof(double) }; - Type[] typesAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; + Type[] typesSav = new Type[] { typeof(double) }; + Type[] typesAnt = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; context.EmitLdc_R8(-0d); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); context.EmitLdvec(op.Rn); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt)); context.EmitStvec(op.Rd); } @@ -429,7 +430,7 @@ namespace ChocolArm64.Instructions Type[] typesAddH = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; context.EmitLdvec(op.Rn); - context.Emit(OpCodes.Dup); + context.EmitLdvec(op.Rn); context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH)); @@ -442,7 +443,7 @@ namespace ChocolArm64.Instructions Type[] typesAddH = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; context.EmitLdvec(op.Rn); - context.Emit(OpCodes.Dup); + context.EmitLdvec(op.Rn); context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH)); @@ -748,11 +749,13 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rn); + + context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm); - context.Emit(OpCodes.Dup); context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6); - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulAdd)); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Add), typesMulAdd)); @@ -770,11 +773,13 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rn); + + context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm); - context.Emit(OpCodes.Dup); context.EmitLdc_I4(op.Index | op.Index << 1); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulAdd)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd)); @@ -863,11 +868,13 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rn); + + context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm); - context.Emit(OpCodes.Dup); context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6); - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub)); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesMulSub)); @@ -885,11 +892,13 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rn); + + context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm); - context.Emit(OpCodes.Dup); context.EmitLdc_I4(op.Index | op.Index << 1); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub)); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub)); @@ -1000,11 +1009,13 @@ namespace ChocolArm64.Instructions Type[] typesMul = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; context.EmitLdvec(op.Rn); + + context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm); - context.Emit(OpCodes.Dup); context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6); - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMul)); context.EmitStvec(op.Rd); @@ -1020,11 +1031,13 @@ namespace ChocolArm64.Instructions Type[] typesMul = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; context.EmitLdvec(op.Rn); + + context.EmitLdvec(op.Rm); context.EmitLdvec(op.Rm); - context.Emit(OpCodes.Dup); context.EmitLdc_I4(op.Index | op.Index << 1); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl)); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMul)); context.EmitStvec(op.Rd); @@ -1772,11 +1785,18 @@ namespace ChocolArm64.Instructions public static void Mla_V(ILEmitterCtx context) { - EmitVectorTernaryOpZx(context, () => + if (Optimizations.UseSse41) { - context.Emit(OpCodes.Mul); - context.Emit(OpCodes.Add); - }); + EmitSse41Mul_AddSub(context, nameof(Sse2.Add)); + } + else + { + EmitVectorTernaryOpZx(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Add); + }); + } } public static void Mla_Ve(ILEmitterCtx context) @@ -1790,11 +1810,18 @@ namespace ChocolArm64.Instructions public static void Mls_V(ILEmitterCtx context) { - EmitVectorTernaryOpZx(context, () => + if (Optimizations.UseSse41) { - context.Emit(OpCodes.Mul); - context.Emit(OpCodes.Sub); - }); + EmitSse41Mul_AddSub(context, nameof(Sse2.Subtract)); + } + else + { + EmitVectorTernaryOpZx(context, () => + { + context.Emit(OpCodes.Mul); + context.Emit(OpCodes.Sub); + }); + } } public static void Mls_Ve(ILEmitterCtx context) @@ -1808,7 +1835,14 @@ namespace ChocolArm64.Instructions public static void Mul_V(ILEmitterCtx context) { - EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Mul)); + if (Optimizations.UseSse41) + { + EmitSse41Mul_AddSub(context); + } + else + { + EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Mul)); + } } public static void Mul_Ve(ILEmitterCtx context) @@ -1923,19 +1957,23 @@ namespace ChocolArm64.Instructions nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int64) }; - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rn); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); @@ -1969,13 +2007,14 @@ namespace ChocolArm64.Instructions nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int64) }; - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); @@ -1999,25 +2038,19 @@ namespace ChocolArm64.Instructions Type[] typesAndXorAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; context.EmitLdvec(op.Rn); - - context.Emit(OpCodes.Dup); - context.EmitStvectmp(); - context.EmitLdvec(op.Rm); - context.Emit(OpCodes.Dup); - context.EmitStvectmp2(); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndXorAdd)); - context.EmitLdvectmp(); - context.EmitLdvectmp2(); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesAndXorAdd)); - context.EmitLdc_I4(1); + context.Emit(OpCodes.Ldc_I4_1); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd)); context.EmitStvec(op.Rd); @@ -2185,20 +2218,24 @@ namespace ChocolArm64.Instructions ? nameof(Sse41.ConvertToVector128Int16) : nameof(Sse41.ConvertToVector128Int32); - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rn); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); @@ -2244,20 +2281,24 @@ namespace ChocolArm64.Instructions ? nameof(Sse41.ConvertToVector128Int16) : nameof(Sse41.ConvertToVector128Int32); - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rn); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); @@ -2441,19 +2482,23 @@ namespace ChocolArm64.Instructions nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int64) }; - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rn); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); @@ -2482,13 +2527,14 @@ namespace ChocolArm64.Instructions nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int64) }; - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); @@ -2594,19 +2640,23 @@ namespace ChocolArm64.Instructions nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int64) }; - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rn); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); @@ -2659,13 +2709,14 @@ namespace ChocolArm64.Instructions nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int64) }; - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); @@ -2689,25 +2740,19 @@ namespace ChocolArm64.Instructions Type[] typesAndXorAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; context.EmitLdvec(op.Rn); - - context.Emit(OpCodes.Dup); - context.EmitStvectmp(); - context.EmitLdvec(op.Rm); - context.Emit(OpCodes.Dup); - context.EmitStvectmp2(); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndXorAdd)); - context.EmitLdvectmp(); - context.EmitLdvectmp2(); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesAndXorAdd)); - context.EmitLdc_I4(1); + context.Emit(OpCodes.Ldc_I4_1); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd)); context.EmitStvec(op.Rd); @@ -2737,8 +2782,7 @@ namespace ChocolArm64.Instructions Type[] typesAvgSub = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] }; context.EmitLdvec(op.Rn); - context.Emit(OpCodes.Dup); - + context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvgSub)); @@ -2862,20 +2906,24 @@ namespace ChocolArm64.Instructions ? nameof(Sse41.ConvertToVector128Int16) : nameof(Sse41.ConvertToVector128Int32); - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rn); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); @@ -2921,20 +2969,24 @@ namespace ChocolArm64.Instructions ? nameof(Sse41.ConvertToVector128Int16) : nameof(Sse41.ConvertToVector128Int32); - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rd); context.EmitLdvec(op.Rn); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt)); @@ -3063,19 +3115,23 @@ namespace ChocolArm64.Instructions nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int64) }; - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rn); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); @@ -3104,13 +3160,14 @@ namespace ChocolArm64.Instructions nameof(Sse41.ConvertToVector128Int32), nameof(Sse41.ConvertToVector128Int64) }; - int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0; - context.EmitLdvec(op.Rn); context.EmitLdvec(op.Rm); - context.EmitLdc_I4(numBytes); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl)); + } context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt)); @@ -3253,5 +3310,77 @@ namespace ChocolArm64.Instructions EmitVectorZeroUpper(context, op.Rd); } } + + private static void EmitSse41Mul_AddSub(ILEmitterCtx context, string nameAddSub = null) + { + OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; + + if (nameAddSub != null) + { + context.EmitLdvec(op.Rd); + } + + if (op.Size == 0) + { + Type[] typesBle = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) }; + Type[] typesMul = new Type[] { typeof(Vector128<short>), typeof(Vector128<short>) }; + Type[] typesShs = new Type[] { typeof(Vector128<short>), typeof(byte) }; + Type[] typesSav = new Type[] { typeof(int) }; + + context.EmitLdvec(op.Rn); + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs)); + + context.EmitLdvec(op.Rm); + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs)); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyLow), typesMul)); + + context.Emit(OpCodes.Ldc_I4_8); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs)); + + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyLow), typesMul)); + + context.EmitLdc_I4(0x00FF00FF); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav)); + + context.EmitCall(typeof(Sse41).GetMethod(nameof(Sse41.BlendVariable), typesBle)); + } + else if (op.Size == 1) + { + Type[] typesMul = new Type[] { typeof(Vector128<short>), typeof(Vector128<short>) }; + + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyLow), typesMul)); + } + else /* if (op.Size == 2) */ + { + Type[] typesMul = new Type[] { typeof(Vector128<int>), typeof(Vector128<int>) }; + + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); + + context.EmitCall(typeof(Sse41).GetMethod(nameof(Sse41.MultiplyLow), typesMul)); + } + + if (nameAddSub != null) + { + Type[] typesAddSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] }; + + context.EmitCall(typeof(Sse2).GetMethod(nameAddSub, typesAddSub)); + } + + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } + } } } |
