aboutsummaryrefslogtreecommitdiff
path: root/ChocolArm64/Instructions/InstEmitSimdShift.cs
diff options
context:
space:
mode:
authorLDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>2019-03-13 09:23:52 +0100
committerjduncanator <1518948+jduncanator@users.noreply.github.com>2019-03-13 19:23:52 +1100
commit1bef70c068f8aeb6a3a518b8ca635de19122da14 (patch)
tree84d3ead95523f4803de1e6288f38ad45d6039005 /ChocolArm64/Instructions/InstEmitSimdShift.cs
parenta0aecd1ff85437890bb6a86fcc71fc90e80a4d24 (diff)
Add Rshrn_V & Shrn_V Sse opt.. Add Mla_V, Mls_V & Mul_V Sse opt.; add Tests. (#614)
* Update CountLeadingZeros(). * Remove obsolete Tests. * Follow-up. * Follow-up. * Follow-up. * Add Mla_V, Mls_V & Mul_V Tests. * Update PackageReferences. * Remove EmitLd/Stvectmp2(). * Remove Dup. Nits. * Remove EmitLd/Stvectmp2() & Dup; nits. * Remove Tmp stuff & Dup; rework Fcvtz() as Fcvtn(). * Remove Tmp stuff, EmitLd/Stvectmp2() & Dup. Nits. * Add (R)shrn_V Sse opt.; add "Part" & "Shift" opt.. Remove Tmp stuff; remove Dup. Nits. * Add Mla/Mls/Mul_V Sse opt.. Add "Part" opt.. Remove EmitLd/Stvectmp2(), remove Dup. Nits. * Nits. * Nits. * Nit. * Add "Part" opt.. Nit. * Nit. * Nit. * Add Cmhi_V & Cmhs_V Sse opt..
Diffstat (limited to 'ChocolArm64/Instructions/InstEmitSimdShift.cs')
-rw-r--r--ChocolArm64/Instructions/InstEmitSimdShift.cs187
1 files changed, 134 insertions, 53 deletions
diff --git a/ChocolArm64/Instructions/InstEmitSimdShift.cs b/ChocolArm64/Instructions/InstEmitSimdShift.cs
index c0b20d7e..6865948a 100644
--- a/ChocolArm64/Instructions/InstEmitSimdShift.cs
+++ b/ChocolArm64/Instructions/InstEmitSimdShift.cs
@@ -5,6 +5,7 @@ using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection.Emit;
+using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instructions.InstEmitSimdHelper;
@@ -13,9 +14,65 @@ namespace ChocolArm64.Instructions
{
static partial class InstEmit
{
+#region "Masks"
+ private static readonly long[] _masks_RshrnShrn = new long[]
+ {
+ 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0,
+ 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0,
+ 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0
+ };
+#endregion
+
public static void Rshrn_V(ILEmitterCtx context)
{
- EmitVectorShrImmNarrowOpZx(context, round: true);
+ if (Optimizations.UseSsse3)
+ {
+ OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
+
+ Type[] typesAdd = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], VectorUIntTypesPerSizeLog2[op.Size + 1] };
+ Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], typeof(byte) };
+ Type[] typesSfl = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) };
+ Type[] typesSav = new Type[] { UIntTypesPerSizeLog2[op.Size + 1] };
+ Type[] typesSve = new Type[] { typeof(long), typeof(long) };
+
+ string nameMov = op.RegisterSize == RegisterSize.Simd128
+ ? nameof(Sse.MoveLowToHigh)
+ : nameof(Sse.MoveHighToLow);
+
+ int shift = GetImmShr(op);
+
+ long roundConst = 1L << (shift - 1);
+
+ context.EmitLdvec(op.Rd);
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
+
+ context.EmitLdvec(op.Rn);
+
+ context.EmitLdc_I8(roundConst);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), typesSav));
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
+
+ context.EmitLdc_I4(shift);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); // value
+
+ context.EmitLdc_I8(_masks_RshrnShrn[op.Size]); // mask
+ context.Emit(OpCodes.Dup); // mask
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));
+
+ context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameMov));
+
+ context.EmitStvec(op.Rd);
+ }
+ else
+ {
+ EmitVectorShrImmNarrowOpZx(context, round: true);
+ }
}
public static void Shl_S(ILEmitterCtx context)
@@ -80,12 +137,13 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
-
context.EmitLdvec(op.Rn);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
@@ -102,7 +160,45 @@ namespace ChocolArm64.Instructions
public static void Shrn_V(ILEmitterCtx context)
{
- EmitVectorShrImmNarrowOpZx(context, round: false);
+ if (Optimizations.UseSsse3)
+ {
+ OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
+
+ Type[] typesSrl = new Type[] { VectorUIntTypesPerSizeLog2[op.Size + 1], typeof(byte) };
+ Type[] typesSfl = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) };
+ Type[] typesSve = new Type[] { typeof(long), typeof(long) };
+
+ string nameMov = op.RegisterSize == RegisterSize.Simd128
+ ? nameof(Sse.MoveLowToHigh)
+ : nameof(Sse.MoveHighToLow);
+
+ int shift = GetImmShr(op);
+
+ context.EmitLdvec(op.Rd);
+ VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh)));
+
+ context.EmitLdvec(op.Rn);
+
+ context.EmitLdc_I4(shift);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl)); // value
+
+ context.EmitLdc_I8(_masks_RshrnShrn[op.Size]); // mask
+ context.Emit(OpCodes.Dup); // mask
+
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve));
+
+ context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl));
+
+ context.EmitCall(typeof(Sse).GetMethod(nameMov));
+
+ context.EmitStvec(op.Rd);
+ }
+ else
+ {
+ EmitVectorShrImmNarrowOpZx(context, round: false);
+ }
}
public static void Sli_V(ILEmitterCtx context)
@@ -271,8 +367,7 @@ namespace ChocolArm64.Instructions
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
- if (Optimizations.UseSse2 && op.Size > 0
- && op.Size < 3)
+ if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
{
Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
@@ -282,16 +377,13 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rn);
- context.Emit(OpCodes.Dup);
- context.EmitStvectmp();
-
context.EmitLdc_I4(eSize - shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
context.EmitLdc_I4(eSize - 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
- context.EmitLdvectmp();
+ context.EmitLdvec(op.Rn);
context.EmitLdc_I4(shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs));
@@ -320,8 +412,7 @@ namespace ChocolArm64.Instructions
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
- if (Optimizations.UseSse2 && op.Size > 0
- && op.Size < 3)
+ if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
{
Type[] typesShs = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
@@ -332,16 +423,13 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
- context.Emit(OpCodes.Dup);
- context.EmitStvectmp();
-
context.EmitLdc_I4(eSize - shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
context.EmitLdc_I4(eSize - 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
- context.EmitLdvectmp();
+ context.EmitLdvec(op.Rn);
context.EmitLdc_I4(shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesShs));
@@ -403,17 +491,21 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
-
context.EmitLdvec(op.Rn);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
- context.EmitLdc_I4(shift);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
+ if (shift != 0)
+ {
+ context.EmitLdc_I4(shift);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
+ }
context.EmitStvec(op.Rd);
}
@@ -432,8 +524,7 @@ namespace ChocolArm64.Instructions
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
- if (Optimizations.UseSse2 && op.Size > 0
- && op.Size < 3)
+ if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
{
Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
@@ -464,8 +555,7 @@ namespace ChocolArm64.Instructions
{
OpCodeSimdShImm64 op = (OpCodeSimdShImm64)context.CurrOp;
- if (Optimizations.UseSse2 && op.Size > 0
- && op.Size < 3)
+ if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3)
{
Type[] typesSra = new Type[] { VectorIntTypesPerSizeLog2[op.Size], typeof(byte) };
Type[] typesAdd = new Type[] { VectorIntTypesPerSizeLog2[op.Size], VectorIntTypesPerSizeLog2[op.Size] };
@@ -474,8 +564,8 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(GetImmShr(op));
-
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), typesSra));
+
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
context.EmitStvec(op.Rd);
@@ -612,16 +702,13 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rn);
- context.Emit(OpCodes.Dup);
- context.EmitStvectmp();
-
context.EmitLdc_I4(eSize - shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
context.EmitLdc_I4(eSize - 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
- context.EmitLdvectmp();
+ context.EmitLdvec(op.Rn);
context.EmitLdc_I4(shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
@@ -661,16 +748,13 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rd);
context.EmitLdvec(op.Rn);
- context.Emit(OpCodes.Dup);
- context.EmitStvectmp();
-
context.EmitLdc_I4(eSize - shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesShs));
context.EmitLdc_I4(eSize - 1);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
- context.EmitLdvectmp();
+ context.EmitLdvec(op.Rn);
context.EmitLdc_I4(shift);
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesShs));
@@ -732,17 +816,21 @@ namespace ChocolArm64.Instructions
nameof(Sse41.ConvertToVector128Int32),
nameof(Sse41.ConvertToVector128Int64) };
- int numBytes = op.RegisterSize == RegisterSize.Simd128 ? 8 : 0;
-
context.EmitLdvec(op.Rn);
- context.EmitLdc_I4(numBytes);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
+ if (op.RegisterSize == RegisterSize.Simd128)
+ {
+ context.Emit(OpCodes.Ldc_I4_8);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), typesSll));
+ }
context.EmitCall(typeof(Sse41).GetMethod(namesCvt[op.Size], typesCvt));
- context.EmitLdc_I4(shift);
- context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
+ if (shift != 0)
+ {
+ context.EmitLdc_I4(shift);
+ context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), typesSll));
+ }
context.EmitStvec(op.Rd);
}
@@ -801,8 +889,8 @@ namespace ChocolArm64.Instructions
context.EmitLdvec(op.Rn);
context.EmitLdc_I4(GetImmShr(op));
-
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), typesSrl));
+
context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), typesAdd));
context.EmitStvec(op.Rd);
@@ -899,12 +987,9 @@ namespace ChocolArm64.Instructions
context.Emit(OpCodes.Add);
}
- EmitVectorInsertTmp(context, index, op.Size);
+ EmitVectorInsert(context, op.Rd, index, op.Size);
}
- context.EmitLdvectmp();
- context.EmitStvec(op.Rd);
-
if ((op.RegisterSize == RegisterSize.Simd64) || scalar)
{
EmitVectorZeroUpper(context, op.Rd);
@@ -1044,11 +1129,7 @@ namespace ChocolArm64.Instructions
}
// dst64 = (Int(src64, signed) + roundConst) >> shift;
- private static void EmitShrImm64(
- ILEmitterCtx context,
- bool signed,
- long roundConst,
- int shift)
+ private static void EmitShrImm64(ILEmitterCtx context, bool signed, long roundConst, int shift)
{
context.EmitLdc_I8(roundConst);
context.EmitLdc_I4(shift);