aboutsummaryrefslogtreecommitdiff
path: root/ChocolArm64/Instruction/AInstEmitSimdShift.cs
diff options
context:
space:
mode:
authorLDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>2018-10-26 00:10:41 +0200
committergdkchan <gab.dark.100@gmail.com>2018-10-25 19:10:41 -0300
commit00d4f44bbb3c7cf768cdd2bf7676b8ea7e6034e2 (patch)
treeb92c5d26c34f9e918a48b055baf71c638b4ce7a7 /ChocolArm64/Instruction/AInstEmitSimdShift.cs
parentf0a49a1c94c74886f4cfb35c35d7deec82f6bd8f (diff)
Add Sse Opt. for S/Uaddl_V, S/Uhadd_V, S/Uhsub_V, S/Umlal_V, S/Umlsl_V, S/Urhadd_V, S/Usubl_V Inst.; and for S/Urshr_V, S/Ursra_V Inst.. (#480)
* Update AILEmitterCtx.cs * Update AInstEmitSimdArithmetic.cs * Update AInstEmitSimdShift.cs
Diffstat (limited to 'ChocolArm64/Instruction/AInstEmitSimdShift.cs')
-rw-r--r--ChocolArm64/Instruction/AInstEmitSimdShift.cs183
1 files changed, 170 insertions, 13 deletions
diff --git a/ChocolArm64/Instruction/AInstEmitSimdShift.cs b/ChocolArm64/Instruction/AInstEmitSimdShift.cs
index 8918c0e1..4f828cf8 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdShift.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdShift.cs
@@ -1,3 +1,5 @@
+// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h
+
using ChocolArm64.Decoder;
using ChocolArm64.State;
using ChocolArm64.Translation;
@@ -34,13 +36,12 @@ namespace ChocolArm64.Instruction
if (AOptimizations.UseSse2 && Op.Size > 0)
{
- Type[] Types = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) };
+ Type[] TypesSll = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) };
EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size);
Context.EmitLdc_I4(GetImmShl(Op));
-
- Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), Types));
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), TypesSll));
EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size);
@@ -156,7 +157,46 @@ namespace ChocolArm64.Instruction
public static void Srshr_V(AILEmitterCtx Context)
{
- EmitVectorShrImmOpSx(Context, ShrImmFlags.Round);
+ AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
+
+ if (AOptimizations.UseSse2 && Op.Size > 0
+ && Op.Size < 3)
+ {
+ Type[] TypesShs = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) };
+ Type[] TypesAdd = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], VectorIntTypesPerSizeLog2[Op.Size] };
+
+ int Shift = GetImmShr(Op);
+ int ESize = 8 << Op.Size;
+
+ EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size);
+
+ Context.Emit(OpCodes.Dup);
+ Context.EmitStvectmp();
+
+ Context.EmitLdc_I4(ESize - Shift);
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), TypesShs));
+
+ Context.EmitLdc_I4(ESize - 1);
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesShs));
+
+ Context.EmitLdvectmp();
+
+ Context.EmitLdc_I4(Shift);
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), TypesShs));
+
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd));
+
+ EmitStvecWithSignedCast(Context, Op.Rd, Op.Size);
+
+ if (Op.RegisterSize == ARegisterSize.SIMD64)
+ {
+ EmitVectorZeroUpper(Context, Op.Rd);
+ }
+ }
+ else
+ {
+ EmitVectorShrImmOpSx(Context, ShrImmFlags.Round);
+ }
}
public static void Srsra_S(AILEmitterCtx Context)
@@ -166,7 +206,48 @@ namespace ChocolArm64.Instruction
public static void Srsra_V(AILEmitterCtx Context)
{
- EmitVectorShrImmOpSx(Context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+ AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
+
+ if (AOptimizations.UseSse2 && Op.Size > 0
+ && Op.Size < 3)
+ {
+ Type[] TypesShs = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) };
+ Type[] TypesAdd = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], VectorIntTypesPerSizeLog2[Op.Size] };
+
+ int Shift = GetImmShr(Op);
+ int ESize = 8 << Op.Size;
+
+ EmitLdvecWithSignedCast(Context, Op.Rd, Op.Size);
+ EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size);
+
+ Context.Emit(OpCodes.Dup);
+ Context.EmitStvectmp();
+
+ Context.EmitLdc_I4(ESize - Shift);
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), TypesShs));
+
+ Context.EmitLdc_I4(ESize - 1);
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesShs));
+
+ Context.EmitLdvectmp();
+
+ Context.EmitLdc_I4(Shift);
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), TypesShs));
+
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd));
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd));
+
+ EmitStvecWithSignedCast(Context, Op.Rd, Op.Size);
+
+ if (Op.RegisterSize == ARegisterSize.SIMD64)
+ {
+ EmitVectorZeroUpper(Context, Op.Rd);
+ }
+ }
+ else
+ {
+ EmitVectorShrImmOpSx(Context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+ }
}
public static void Sshl_V(AILEmitterCtx Context)
@@ -193,13 +274,12 @@ namespace ChocolArm64.Instruction
if (AOptimizations.UseSse2 && Op.Size > 0
&& Op.Size < 3)
{
- Type[] Types = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) };
+ Type[] TypesSra = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) };
EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size);
Context.EmitLdc_I4(GetImmShr(Op));
-
- Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), Types));
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), TypesSra));
EmitStvecWithSignedCast(Context, Op.Rd, Op.Size);
@@ -277,7 +357,45 @@ namespace ChocolArm64.Instruction
public static void Urshr_V(AILEmitterCtx Context)
{
- EmitVectorShrImmOpZx(Context, ShrImmFlags.Round);
+ AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
+
+ if (AOptimizations.UseSse2 && Op.Size > 0)
+ {
+ Type[] TypesShs = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) };
+ Type[] TypesAdd = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], VectorUIntTypesPerSizeLog2[Op.Size] };
+
+ int Shift = GetImmShr(Op);
+ int ESize = 8 << Op.Size;
+
+ EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size);
+
+ Context.Emit(OpCodes.Dup);
+ Context.EmitStvectmp();
+
+ Context.EmitLdc_I4(ESize - Shift);
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), TypesShs));
+
+ Context.EmitLdc_I4(ESize - 1);
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesShs));
+
+ Context.EmitLdvectmp();
+
+ Context.EmitLdc_I4(Shift);
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesShs));
+
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd));
+
+ EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size);
+
+ if (Op.RegisterSize == ARegisterSize.SIMD64)
+ {
+ EmitVectorZeroUpper(Context, Op.Rd);
+ }
+ }
+ else
+ {
+ EmitVectorShrImmOpZx(Context, ShrImmFlags.Round);
+ }
}
public static void Ursra_S(AILEmitterCtx Context)
@@ -287,7 +405,47 @@ namespace ChocolArm64.Instruction
public static void Ursra_V(AILEmitterCtx Context)
{
- EmitVectorShrImmOpZx(Context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+ AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
+
+ if (AOptimizations.UseSse2 && Op.Size > 0)
+ {
+ Type[] TypesShs = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) };
+ Type[] TypesAdd = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], VectorUIntTypesPerSizeLog2[Op.Size] };
+
+ int Shift = GetImmShr(Op);
+ int ESize = 8 << Op.Size;
+
+ EmitLdvecWithUnsignedCast(Context, Op.Rd, Op.Size);
+ EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size);
+
+ Context.Emit(OpCodes.Dup);
+ Context.EmitStvectmp();
+
+ Context.EmitLdc_I4(ESize - Shift);
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), TypesShs));
+
+ Context.EmitLdc_I4(ESize - 1);
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesShs));
+
+ Context.EmitLdvectmp();
+
+ Context.EmitLdc_I4(Shift);
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesShs));
+
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd));
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd));
+
+ EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size);
+
+ if (Op.RegisterSize == ARegisterSize.SIMD64)
+ {
+ EmitVectorZeroUpper(Context, Op.Rd);
+ }
+ }
+ else
+ {
+ EmitVectorShrImmOpZx(Context, ShrImmFlags.Round | ShrImmFlags.Accumulate);
+ }
}
public static void Ushl_V(AILEmitterCtx Context)
@@ -313,13 +471,12 @@ namespace ChocolArm64.Instruction
if (AOptimizations.UseSse2 && Op.Size > 0)
{
- Type[] Types = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) };
+ Type[] TypesSrl = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) };
EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size);
Context.EmitLdc_I4(GetImmShr(Op));
-
- Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), Types));
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesSrl));
EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size);