diff options
| author | LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> | 2019-01-29 14:54:39 +0100 |
|---|---|---|
| committer | gdkchan <gab.dark.100@gmail.com> | 2019-01-29 10:54:39 -0300 |
| commit | 8f7fcede7fa98c605925dc7b9316940960543bf1 (patch) | |
| tree | de8fa085c85ed8419abd25e04a707e007f180fe4 /ChocolArm64/Instructions/InstEmitSimdMove.cs | |
| parent | 36b9ab0e48b6893c057a954e1ef3181b452add1c (diff) | |
Add Smlal_Ve, Smlsl_Ve, Smull_Ve, Umlal_Ve, Umlsl_Ve, Umull_Ve Inst.; add Tests. Add Sse Opt. for Trn1/2_V and Uzp1/2_V Inst. Nits. (#566)
* Update OpCodeTable.cs
* Update InstEmitSimdArithmetic.cs
* Update InstEmitSimdHelper.cs
* Update CpuTestSimdRegElem.cs
* Update InstEmitSimdMove.cs
* Update InstEmitSimdCvt.cs
* Update SoftFallback.cs
* Update InstEmitSimdHelper.cs
* Update SoftFloat.cs
* Update CryptoHelper.cs
* Update InstEmitSimdArithmetic.cs
* Update InstEmitSimdCmp.cs
* Address PR feedback.
* Address PR feedback.
Diffstat (limited to 'ChocolArm64/Instructions/InstEmitSimdMove.cs')
| -rw-r--r-- | ChocolArm64/Instructions/InstEmitSimdMove.cs | 242 |
1 files changed, 180 insertions, 62 deletions
diff --git a/ChocolArm64/Instructions/InstEmitSimdMove.cs b/ChocolArm64/Instructions/InstEmitSimdMove.cs index d40ccff9..2844dfdf 100644 --- a/ChocolArm64/Instructions/InstEmitSimdMove.cs +++ b/ChocolArm64/Instructions/InstEmitSimdMove.cs @@ -12,6 +12,34 @@ namespace ChocolArm64.Instructions { static partial class InstEmit { +#region "Masks" + private static readonly long[] _masksE0_TrnUzpXtn = new long[] + { + 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0, + 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, + 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0 + }; + + private static readonly long[] _masksE1_TrnUzp = new long[] + { + 15L << 56 | 13L << 48 | 11L << 40 | 09L << 32 | 07L << 24 | 05L << 16 | 03L << 8 | 01L << 0, + 15L << 56 | 14L << 48 | 11L << 40 | 10L << 32 | 07L << 24 | 06L << 16 | 03L << 8 | 02L << 0, + 15L << 56 | 14L << 48 | 13L << 40 | 12L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0 + }; + + private static readonly long[] _masksE0_Uzp = new long[] + { + 13L << 56 | 09L << 48 | 05L << 40 | 01L << 32 | 12L << 24 | 08L << 16 | 04L << 8 | 00L << 0, + 11L << 56 | 10L << 48 | 03L << 40 | 02L << 32 | 09L << 24 | 08L << 16 | 01L << 8 | 00L << 0 + }; + + private static readonly long[] _masksE1_Uzp = new long[] + { + 15L << 56 | 11L << 48 | 07L << 40 | 03L << 32 | 14L << 24 | 10L << 16 | 06L << 8 | 02L << 0, + 15L << 56 | 14L << 48 | 07L << 40 | 06L << 32 | 13L << 24 | 12L << 16 | 05L << 8 | 04L << 0 + }; +#endregion + public static void Dup_Gp(ILEmitterCtx context) { OpCodeSimdIns64 op = (OpCodeSimdIns64)context.CurrOp; @@ -379,15 +407,6 @@ namespace ChocolArm64.Instructions if (Optimizations.UseSsse3) { - long[] masks = new long[] - { - 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0, - 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, - 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0 - }; - - Type[] typesMov = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; - Type[] typesSfl = new Type[] { typeof(Vector128<sbyte>), typeof(Vector128<sbyte>) }; Type[] typesSve = new Type[] { typeof(long), typeof(long) }; string nameMov = op.RegisterSize == RegisterSize.Simd128 @@ -397,18 +416,18 @@ namespace ChocolArm64.Instructions context.EmitLdvec(op.Rd); VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); - context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh), typesMov)); + context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MoveLowToHigh))); - EmitLdvecWithSignedCast(context, op.Rn, 0); + EmitLdvecWithSignedCast(context, op.Rn, 0); // value - context.EmitLdc_I8(masks[op.Size]); - context.Emit(OpCodes.Dup); + context.EmitLdc_I8(_masksE0_TrnUzpXtn[op.Size]); // mask + context.Emit(OpCodes.Dup); // mask context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve)); - context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), typesSfl)); + context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0))); - context.EmitCall(typeof(Sse).GetMethod(nameMov, typesMov)); + context.EmitCall(typeof(Sse).GetMethod(nameMov)); context.EmitStvec(op.Rd); } @@ -465,22 +484,61 @@ namespace ChocolArm64.Instructions { OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; - int words = op.GetBitsCount() >> 4; - int pairs = words >> op.Size; - - for (int index = 0; index < pairs; index++) + if (Optimizations.UseSsse3) { - int idx = index << 1; + Type[] typesSve = new Type[] { typeof(long), typeof(long) }; + + string nameUpk = part == 0 + ? nameof(Sse2.UnpackLow) + : nameof(Sse2.UnpackHigh); + + EmitLdvecWithSignedCast(context, op.Rn, op.Size); // value + + if (op.Size < 3) + { + context.EmitLdc_I8(_masksE1_TrnUzp [op.Size]); // maskE1 + context.EmitLdc_I8(_masksE0_TrnUzpXtn[op.Size]); // maskE0 + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve)); + + context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0))); + } + + EmitLdvecWithSignedCast(context, op.Rm, op.Size); // value + + if (op.Size < 3) + { + context.EmitLdc_I8(_masksE1_TrnUzp [op.Size]); // maskE1 + context.EmitLdc_I8(_masksE0_TrnUzpXtn[op.Size]); // maskE0 + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve)); + + context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0))); + } - EmitVectorExtractZx(context, op.Rn, idx + part, op.Size); - EmitVectorExtractZx(context, op.Rm, idx + part, op.Size); + context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(op.Size))); - EmitVectorInsertTmp(context, idx + 1, op.Size); - EmitVectorInsertTmp(context, idx, op.Size); + EmitStvecWithSignedCast(context, op.Rd, op.Size); } + else + { + int words = op.GetBitsCount() >> 4; + int pairs = words >> op.Size; - context.EmitLdvectmp(); - context.EmitStvec(op.Rd); + for (int index = 0; index < pairs; index++) + { + int idx = index << 1; + + EmitVectorExtractZx(context, op.Rn, idx + part, op.Size); + EmitVectorExtractZx(context, op.Rm, idx + part, op.Size); + + EmitVectorInsertTmp(context, idx + 1, op.Size); + EmitVectorInsertTmp(context, idx, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + } if (op.RegisterSize == RegisterSize.Simd64) { @@ -492,26 +550,91 @@ namespace ChocolArm64.Instructions { OpCodeSimdReg64 op = (OpCodeSimdReg64)context.CurrOp; - int words = op.GetBitsCount() >> 4; - int pairs = words >> op.Size; - - for (int index = 0; index < pairs; index++) + if (Optimizations.UseSsse3) { - int idx = index << 1; + Type[] typesSve = new Type[] { typeof(long), typeof(long) }; + + string nameUpk = part == 0 + ? nameof(Sse2.UnpackLow) + : nameof(Sse2.UnpackHigh); - EmitVectorExtractZx(context, op.Rn, idx + part, op.Size); - EmitVectorExtractZx(context, op.Rm, idx + part, op.Size); + if (op.RegisterSize == RegisterSize.Simd128) + { + EmitLdvecWithSignedCast(context, op.Rn, op.Size); // value - EmitVectorInsertTmp(context, pairs + index, op.Size); - EmitVectorInsertTmp(context, index, op.Size); - } + if (op.Size < 3) + { + context.EmitLdc_I8(_masksE1_TrnUzp [op.Size]); // maskE1 + context.EmitLdc_I8(_masksE0_TrnUzpXtn[op.Size]); // maskE0 - context.EmitLdvectmp(); - context.EmitStvec(op.Rd); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve)); - if (op.RegisterSize == RegisterSize.Simd64) + context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0))); + } + + EmitLdvecWithSignedCast(context, op.Rm, op.Size); // value + + if (op.Size < 3) + { + context.EmitLdc_I8(_masksE1_TrnUzp [op.Size]); // maskE1 + context.EmitLdc_I8(_masksE0_TrnUzpXtn[op.Size]); // maskE0 + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve)); + + context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0))); + } + + context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(3))); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + } + else + { + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + EmitLdvecWithSignedCast(context, op.Rm, op.Size); + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackLow), GetTypesSflUpk(op.Size))); // value + + if (op.Size < 2) + { + context.EmitLdc_I8(_masksE1_Uzp[op.Size]); // maskE1 + context.EmitLdc_I8(_masksE0_Uzp[op.Size]); // maskE0 + + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetVector128), typesSve)); + + context.EmitCall(typeof(Ssse3).GetMethod(nameof(Ssse3.Shuffle), GetTypesSflUpk(0))); + } + + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt64Zero)); + + context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(3))); + + EmitStvecWithSignedCast(context, op.Rd, op.Size); + } + } + else { - EmitVectorZeroUpper(context, op.Rd); + int words = op.GetBitsCount() >> 4; + int pairs = words >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int idx = index << 1; + + EmitVectorExtractZx(context, op.Rn, idx + part, op.Size); + EmitVectorExtractZx(context, op.Rm, idx + part, op.Size); + + EmitVectorInsertTmp(context, pairs + index, op.Size); + EmitVectorInsertTmp(context, index, op.Size); + } + + context.EmitLdvectmp(); + context.EmitStvec(op.Rd); + + if (op.RegisterSize == RegisterSize.Simd64) + { + EmitVectorZeroUpper(context, op.Rd); + } } } @@ -521,36 +644,26 @@ namespace ChocolArm64.Instructions if (Optimizations.UseSse2) { - EmitLdvecWithUnsignedCast(context, op.Rn, op.Size); - EmitLdvecWithUnsignedCast(context, op.Rm, op.Size); - - Type[] types = new Type[] - { - VectorUIntTypesPerSizeLog2[op.Size], - VectorUIntTypesPerSizeLog2[op.Size] - }; - - string name = part == 0 || (part != 0 && op.RegisterSize == RegisterSize.Simd64) + string nameUpk = part == 0 ? nameof(Sse2.UnpackLow) : nameof(Sse2.UnpackHigh); - context.EmitCall(typeof(Sse2).GetMethod(name, types)); + EmitLdvecWithSignedCast(context, op.Rn, op.Size); + EmitLdvecWithSignedCast(context, op.Rm, op.Size); - if (op.RegisterSize == RegisterSize.Simd64 && part != 0) + if (op.RegisterSize == RegisterSize.Simd128) { - context.EmitLdc_I4(8); - - Type[] shTypes = new Type[] { VectorUIntTypesPerSizeLog2[op.Size], typeof(byte) }; - - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), shTypes)); + context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(op.Size))); } - - EmitStvecWithUnsignedCast(context, op.Rd, op.Size); - - if (op.RegisterSize == RegisterSize.Simd64 && part == 0) + else { - EmitVectorZeroUpper(context, op.Rd); + context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackLow), GetTypesSflUpk(op.Size))); + VectorHelper.EmitCall(context, nameof(VectorHelper.VectorInt64Zero)); + + context.EmitCall(typeof(Sse2).GetMethod(nameUpk, GetTypesSflUpk(3))); } + + EmitStvecWithSignedCast(context, op.Rd, op.Size); } else { @@ -579,5 +692,10 @@ namespace ChocolArm64.Instructions } } } + + private static Type[] GetTypesSflUpk(int size) + { + return new Type[] { VectorIntTypesPerSizeLog2[size], VectorIntTypesPerSizeLog2[size] }; + } } } |
