aboutsummaryrefslogtreecommitdiff
path: root/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs
diff options
context:
space:
mode:
authorLDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>2019-03-13 09:23:52 +0100
committerjduncanator <1518948+jduncanator@users.noreply.github.com>2019-03-13 19:23:52 +1100
commit1bef70c068f8aeb6a3a518b8ca635de19122da14 (patch)
tree84d3ead95523f4803de1e6288f38ad45d6039005 /ChocolArm64/Instructions/InstEmitSimdArithmetic.cs
parenta0aecd1ff85437890bb6a86fcc71fc90e80a4d24 (diff)
Add Rshrn_V & Shrn_V Sse opt.. Add Mla_V, Mls_V & Mul_V Sse opt.; add Tests. (#614)
* Update CountLeadingZeros(). * Remove obsolete Tests. * Follow-up. * Follow-up. * Follow-up. * Add Mla_V, Mls_V & Mul_V Tests. * Update PackageReferences. * Remove EmitLd/Stvectmp2(). * Remove Dup. Nits. * Remove EmitLd/Stvectmp2() & Dup; nits. * Remove Tmp stuff & Dup; rework Fcvtz() as Fcvtn(). * Remove Tmp stuff, EmitLd/Stvectmp2() & Dup. Nits. * Add (R)shrn_V Sse opt.; add "Part" & "Shift" opt.. Remove Tmp stuff; remove Dup. Nits. * Add Mla/Mls/Mul_V Sse opt.. Add "Part" opt.. Remove EmitLd/Stvectmp2(), remove Dup. Nits. * Nits. * Nits. * Nit. * Add "Part" opt.. Nit. * Nit. * Nit. * Add Cmhi_V & Cmhs_V Sse opt..
Diffstat (limited to 'ChocolArm64/Instructions/InstEmitSimdArithmetic.cs')
-rw-r--r--ChocolArm64/Instructions/InstEmitSimdArithmetic.cs407
1 files changed, 268 insertions, 139 deletions
diff --git a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs
index f7236e9a..5ceea774 100644
--- a/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs
+++ b/ChocolArm64/Instructions/InstEmitSimdArithmetic.cs
@@ -1,4 +1,5 @@
// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
+// https://www.agner.org/optimize/#vectorclass @ vectori128.h
using ChocolArm64.Decoders;
using ChocolArm64.State;
@@ -184,8 +185,8 @@ namespace ChocolArm64.Instructions
if (sizeF == 0)
{
- Type[] typesSsv = new Type[] { typeof(float) };
- Type[] typesSubAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
+ Type[] typesSsv = new Type[] { typeof(float) };
+ Type[] typesSubAnt = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdc_R4(-0f);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv));
@@ -193,8 +194,8 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
- context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesSubAndNot));
- context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot));
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SubtractScalar), typesSubAnt));
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAnt));
context.EmitStvec(op.Rd);
@@ -202,8 +203,8 @@ namespace ChocolArm64.Instructions
}
else /* if (sizeF == 1) */
{
- Type[] typesSsv = new Type[] { typeof(double) };
- Type[] typesSubAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
+ Type[] typesSsv = new Type[] { typeof(double) };
+ Type[] typesSubAnt = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
@@ -211,8 +212,8 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesSubAndNot));
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SubtractScalar), typesSubAnt));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAnt));
context.EmitStvec(op.Rd);
@@ -240,8 +241,8 @@ namespace ChocolArm64.Instructions
if (sizeF == 0)
{
- Type[] typesSav = new Type[] { typeof(float) };
- Type[] typesSubAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
+ Type[] typesSav = new Type[] { typeof(float) };
+ Type[] typesSubAnt = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdc_R4(-0f);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav));
@@ -249,8 +250,8 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
- context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesSubAndNot));
- context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAndNot));
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesSubAnt));
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesSubAnt));
context.EmitStvec(op.Rd);
@@ -261,8 +262,8 @@ namespace ChocolArm64.Instructions
}
else /* if (sizeF == 1) */
{
- Type[] typesSav = new Type[] { typeof(double) };
- Type[] typesSubAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
+ Type[] typesSav = new Type[] { typeof(double) };
+ Type[] typesSubAnt = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
@@ -270,8 +271,8 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAndNot));
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAndNot));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesSubAnt));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesSubAnt));
context.EmitStvec(op.Rd);
}
@@ -295,15 +296,15 @@ namespace ChocolArm64.Instructions
if (op.Size == 0)
{
- Type[] typesSsv = new Type[] { typeof(float) };
- Type[] typesAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
+ Type[] typesSsv = new Type[] { typeof(float) };
+ Type[] typesAnt = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdc_R4(-0f);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetScalarVector128), typesSsv));
context.EmitLdvec(op.Rn);
- context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAndNot));
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAnt));
context.EmitStvec(op.Rd);
@@ -311,15 +312,15 @@ namespace ChocolArm64.Instructions
}
else /* if (op.Size == 1) */
{
- Type[] typesSsv = new Type[] { typeof(double) };
- Type[] typesAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
+ Type[] typesSsv = new Type[] { typeof(double) };
+ Type[] typesAnt = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetScalarVector128), typesSsv));
context.EmitLdvec(op.Rn);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt));
context.EmitStvec(op.Rd);
@@ -345,15 +346,15 @@ namespace ChocolArm64.Instructions
if (sizeF == 0)
{
- Type[] typesSav = new Type[] { typeof(float) };
- Type[] typesAndNot = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
+ Type[] typesSav = new Type[] { typeof(float) };
+ Type[] typesAnt = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdc_R4(-0f);
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.SetAllVector128), typesSav));
context.EmitLdvec(op.Rn);
- context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAndNot));
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AndNot), typesAnt));
context.EmitStvec(op.Rd);
@@ -364,15 +365,15 @@ namespace ChocolArm64.Instructions
}
else /* if (sizeF == 1) */
{
- Type[] typesSav = new Type[] { typeof(double) };
- Type[] typesAndNot = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
+ Type[] typesSav = new Type[] { typeof(double) };
+ Type[] typesAnt = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
context.EmitLdc_R8(-0d);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
context.EmitLdvec(op.Rn);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAndNot));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), typesAnt));
context.EmitStvec(op.Rd);
}
@@ -429,7 +430,7 @@ namespace ChocolArm64.Instructions
Type[] typesAddH = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdvec(op.Rn);
- context.Emit(OpCodes.Dup);
+ context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH));
@@ -442,7 +443,7 @@ namespace ChocolArm64.Instructions
Type[] typesAddH = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
context.EmitLdvec(op.Rn);
- context.Emit(OpCodes.Dup);
+ context.EmitLdvec(op.Rn);
context.EmitCall(typeof(Sse3).GetMethod(nameof(Sse3.HorizontalAdd), typesAddH));
@@ -748,11 +749,13 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
+
+ context.EmitLdvec(op.Rm);
context.EmitLdvec(op.Rm);
- context.Emit(OpCodes.Dup);
context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
- context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
+
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulAdd));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Add), typesMulAdd));
@@ -770,11 +773,13 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
+
+ context.EmitLdvec(op.Rm);
context.EmitLdvec(op.Rm);
- context.Emit(OpCodes.Dup);
context.EmitLdc_I4(op.Index | op.Index << 1);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
+
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulAdd));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesMulAdd));
@@ -863,11 +868,13 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
+
+ context.EmitLdvec(op.Rm);
context.EmitLdvec(op.Rm);
- context.Emit(OpCodes.Dup);
context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
- context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
+
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMulSub));
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Subtract), typesMulSub));
@@ -885,11 +892,13 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
+
+ context.EmitLdvec(op.Rm);
context.EmitLdvec(op.Rm);
- context.Emit(OpCodes.Dup);
context.EmitLdc_I4(op.Index | op.Index << 1);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
+
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMulSub));
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Subtract), typesMulSub));
@@ -1000,11 +1009,13 @@ namespace ChocolArm64.Instructions
Type[] typesMul = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
context.EmitLdvec(op.Rn);
+
+ context.EmitLdvec(op.Rm);
context.EmitLdvec(op.Rm);
- context.Emit(OpCodes.Dup);
context.EmitLdc_I4(op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6);
- context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl));
+
context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Multiply), typesMul));
context.EmitStvec(op.Rd);
@@ -1020,11 +1031,13 @@ namespace ChocolArm64.Instructions
Type[] typesMul = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
context.EmitLdvec(op.Rn);
+
+ context.EmitLdvec(op.Rm);
context.EmitLdvec(op.Rm);
- context.Emit(OpCodes.Dup);
context.EmitLdc_I4(op.Index | op.Index << 1);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Shuffle), typesSfl));
+
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Multiply), typesMul));
context.EmitStvec(op.Rd);
@@ -1772,11 +1785,18 @@ namespace ChocolArm64.Instructions
public static void Mla_V(ILEmitterCtx context)
{
- EmitVectorTernaryOpZx(context, () =>
+ if (Optimizations.UseSse41)
{
- context.Emit(OpCodes.Mul);
- context.Emit(OpCodes.Add);
- });
+ EmitSse41Mul_AddSub(context, nameof(Sse2.Add));
+ }
+ else
+ {
+ EmitVectorTernaryOpZx(context, () =>
+ {
+ context.Emit(OpCodes.Mul);
+ context.Emit(OpCodes.Add);
+ });
+ }
}
public static void Mla_Ve(ILEmitterCtx context)
@@ -1790,11 +1810,18 @@ namespace ChocolArm64.Instructions
public static void Mls_V(ILEmitterCtx context)
{
- EmitVectorTernaryOpZx(context, () =>
+ if (Optimizations.UseSse41)
{
- context.Emit(OpCodes.Mul);
- context.Emit(OpCodes.Sub);
- });
+ EmitSse41Mul_AddSub(context, nameof(Sse2.Subtract));
+ }
+ else
+ {
+ EmitVectorTernaryOpZx(context, () =>
+ {
+ context.Emit(OpCodes.Mul);
+ context.Emit(OpCodes.Sub);
+ });
+ }
}
public static void Mls_Ve(ILEmitterCtx context)
@@ -1808,7 +1835,14 @@ namespace ChocolArm64.Instructions
public static void Mul_V(ILEmitterCtx context)
{
- EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Mul));
+ if (Optimizations.UseSse41)
+ {
+ EmitSse41Mul_AddSub(context);
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(context, () => context.Emit(OpCodes.Mul));
+ }
}
public static void Mul_Ve(ILEmitterCtx context)
@@ -1923,19 +1957,23 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
-
context.EmitLdvec(op.Rn);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitLdvec(op.Rm);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@@ -1969,13 +2007,14 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
-
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@@ -1999,25 +2038,19 @@ namespace ChocolArm64.Instructions
Type[] typesAndXorAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
context.EmitLdvec(op.Rn);
-
- context.Emit(OpCodes.Dup);
- context.EmitStvectmp();
-
context.EmitLdvec(op.Rm);
- context.Emit(OpCodes.Dup);
- context.EmitStvectmp2();
-
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndXorAdd));
- context.EmitLdvectmp();
- context.EmitLdvectmp2();
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesAndXorAdd));
- context.EmitLdc_I4(1);
+ context.Emit(OpCodes.Ldc_I4_1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra));
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd));
context.EmitStvec(op.Rd);
@@ -2185,20 +2218,24 @@ namespace ChocolArm64.Instructions
? nameof(Sse41.ConvertToVector128Int16)
: nameof(Sse41.ConvertToVector128Int32);
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
-
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
context.EmitLdvec(op.Rm);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
@@ -2244,20 +2281,24 @@ namespace ChocolArm64.Instructions
? nameof(Sse41.ConvertToVector128Int16)
: nameof(Sse41.ConvertToVector128Int32);
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
-
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
context.EmitLdvec(op.Rm);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
@@ -2441,19 +2482,23 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
-
context.EmitLdvec(op.Rn);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitLdvec(op.Rm);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@@ -2482,13 +2527,14 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
-
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@@ -2594,19 +2640,23 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
-
context.EmitLdvec(op.Rn);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitLdvec(op.Rm);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@@ -2659,13 +2709,14 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
-
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@@ -2689,25 +2740,19 @@ namespace ChocolArm64.Instructions
Type[] typesAndXorAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
context.EmitLdvec(op.Rn);
-
- context.Emit(OpCodes.Dup);
- context.EmitStvectmp();
-
context.EmitLdvec(op.Rm);
- context.Emit(OpCodes.Dup);
- context.EmitStvectmp2();
-
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), typesAndXorAdd));
- context.EmitLdvectmp();
- context.EmitLdvectmp2();
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), typesAndXorAdd));
- context.EmitLdc_I4(1);
+ context.Emit(OpCodes.Ldc_I4_1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl));
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAndXorAdd));
context.EmitStvec(op.Rd);
@@ -2737,8 +2782,7 @@ namespace ChocolArm64.Instructions
Type[] typesAvgSub = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], VectorUIntTypesPerSizeLog2[op.Size] };
context.EmitLdvec(op.Rn);
- context.Emit(OpCodes.Dup);
-
+ context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Average), typesAvgSub));
@@ -2862,20 +2906,24 @@ namespace ChocolArm64.Instructions
? nameof(Sse41.ConvertToVector128Int16)
: nameof(Sse41.ConvertToVector128Int32);
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
-
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
context.EmitLdvec(op.Rm);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
@@ -2921,20 +2969,24 @@ namespace ChocolArm64.Instructions
? nameof(Sse41.ConvertToVector128Int16)
: nameof(Sse41.ConvertToVector128Int32);
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
-
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
context.EmitLdvec(op.Rm);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(nameCvt, typesCvt));
@@ -3063,19 +3115,23 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
-
context.EmitLdvec(op.Rn);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
context.EmitLdvec(op.Rm);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@@ -3104,13 +3160,14 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
-
context.EmitLdvec(op.Rn);
context.EmitLdvec(op.Rm);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSrl));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@@ -3253,5 +3310,77 @@ namespace ChocolArm64.Instructions
EmitVectorZeroUpper(context, op.Rd);
}
}
+
+ private static void EmitSse41Mul_AddSub(ILEmitterCtx context, string nameAddSub = null)
+ {
+ OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp;
+
+ if (nameAddSub != null)
+ {
+ context.EmitLdvec(op.Rd);
+ }
+
+ if (op.Size == 0)
+ {
+ Type[] typesBle = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) };
+ Type[] typesMul = new Type[] { typeof(Vector128<short>), typeof(Vector128<short>) };
+ Type[] typesShs = new Type[] { typeof(Vector128<short>), typeof(byte) };
+ Type[] typesSav = new Type[] { typeof(int) };
+
+ context.EmitLdvec(op.Rn);
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
+
+ context.EmitLdvec(op.Rm);
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyLow), typesMul));
+
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
+
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyLow), typesMul));
+
+ context.EmitLdc_I4(0x00FF00FF);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ context.EmitCall(typeof(Sse41).GetMethod(nameof(Sse41.BlendVariable), typesBle));
+ }
+ else if (op.Size == 1)
+ {
+ Type[] typesMul = new Type[] { typeof(Vector128<short>), typeof(Vector128<short>) };
+
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyLow), typesMul));
+ }
+ else /* if (op.Size == 2) */
+ {
+ Type[] typesMul = new Type[] { typeof(Vector128<int>), typeof(Vector128<int>) };
+
+ context.EmitLdvec(op.Rn);
+ context.EmitLdvec(op.Rm);
+
+ context.EmitCall(typeof(Sse41).GetMethod(nameof(Sse41.MultiplyLow), typesMul));
+ }
+
+ if (nameAddSub != null)
+ {
+ Type[] typesAddSub = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameAddSub, typesAddSub));
+ }
+
+ context.EmitStvec(op.Rd);
+
+ if (op.RegisterSize == RegisterSize.Simd64)
+ {
+ EmitVectorZeroUpper(context, op.Rd);
+ }
+ }
}
}