diff options
| author | LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> | 2019-03-13 09:23:52 +0100 |
|---|---|---|
| committer | jduncanator <1518948+jduncanator@users.noreply.github.com> | 2019-03-13 19:23:52 +1100 |
| commit | 1bef70c068f8aeb6a3a518b8ca635de19122da14 (patch) | |
| tree | 84d3ead95523f4803de1e6288f38ad45d6039005 /ChocolArm64/Instructions/InstEmitSimdHelper.cs | |
| parent | a0aecd1ff85437890bb6a86fcc71fc90e80a4d24 (diff) | |
Add Rshrn_V & Shrn_V Sse opt.. Add Mla_V, Mls_V & Mul_V Sse opt.; add Tests. (#614)
* Update CountLeadingZeros().
* Remove obsolete Tests.
* Follow-up.
* Follow-up.
* Follow-up.
* Add Mla_V, Mls_V & Mul_V Tests.
* Update PackageReferences.
* Remove EmitLd/Stvectmp2().
* Remove Dup. Nits.
* Remove EmitLd/Stvectmp2() & Dup; nits.
* Remove Tmp stuff & Dup; rework Fcvtz() as Fcvtn().
* Remove Tmp stuff, EmitLd/Stvectmp2() & Dup. Nits.
* Add (R)shrn_V Sse opt.; add "Part" & "Shift" opt..
Remove Tmp stuff; remove Dup.
Nits.
* Add Mla/Mls/Mul_V Sse opt.. Add "Part" opt..
Remove EmitLd/Stvectmp2(), remove Dup.
Nits.
* Nits.
* Nits.
* Nit.
* Add "Part" opt.. Nit.
* Nit.
* Nit.
* Add Cmhi_V & Cmhs_V Sse opt..
Diffstat (limited to 'ChocolArm64/Instructions/InstEmitSimdHelper.cs')
| -rw-r--r-- | ChocolArm64/Instructions/InstEmitSimdHelper.cs | 83 |
1 files changed, 35 insertions, 48 deletions
diff --git a/ChocolArm64/Instructions/InstEmitSimdHelper.cs b/ChocolArm64/Instructions/InstEmitSimdHelper.cs index b7dd09b4..10b86a3e 100644 --- a/ChocolArm64/Instructions/InstEmitSimdHelper.cs +++ b/ChocolArm64/Instructions/InstEmitSimdHelper.cs @@ -592,12 +592,9 @@ namespace ChocolArm64.Instructions emit(); - EmitVectorInsertTmp(context, index, op.Size); + EmitVectorInsert(context, op.Rd, index, op.Size); } - context.EmitLdvectmp(); - context.EmitStvec(op.Rd); - if (op.RegisterSize == RegisterSize.Simd64) { EmitVectorZeroUpper(context, op.Rd); @@ -898,20 +895,13 @@ namespace ChocolArm64.Instructions Type[] types = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) }; context.EmitLdvec(op.Rn); - - context.Emit(OpCodes.Dup); - context.EmitStvectmp(); - context.EmitLdvec(op.Rm); - context.Emit(OpCodes.Dup); - context.EmitStvectmp2(); - context.EmitLdc_I4(2 << 6 | 0 << 4 | 2 << 2 | 0 << 0); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); - context.EmitLdvectmp(); - context.EmitLdvectmp2(); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitLdc_I4(3 << 6 | 1 << 4 | 3 << 2 | 1 << 0); context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.Shuffle), typesSfl)); @@ -926,19 +916,12 @@ namespace ChocolArm64.Instructions Type[] types = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) }; context.EmitLdvec(op.Rn); - - context.Emit(OpCodes.Dup); - context.EmitStvectmp(); - context.EmitLdvec(op.Rm); - context.Emit(OpCodes.Dup); - context.EmitStvectmp2(); - context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackLow), types)); - context.EmitLdvectmp(); - context.EmitLdvectmp2(); + context.EmitLdvec(op.Rn); + context.EmitLdvec(op.Rm); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.UnpackHigh), types)); @@ -985,11 +968,6 @@ namespace ChocolArm64.Instructions int bytes = op.GetBitsCount() >> 3; int elems = !scalar ? bytes >> op.Size : 1; - if (scalar) - { - EmitVectorZeroLowerTmp(context); - } - for (int index = 0; index < elems; index++) { EmitVectorExtractSx(context, op.Rn, index, op.Size); @@ -1005,13 +983,15 @@ namespace ChocolArm64.Instructions EmitUnarySignedSatQAbsOrNeg(context); } - EmitVectorInsertTmp(context, index, op.Size); - } + if (scalar) + { + EmitVectorZeroAll(context, op.Rd); + } - context.EmitLdvectmp(); - context.EmitStvec(op.Rd); + EmitVectorInsert(context, op.Rd, index, op.Size); + } - if ((op.RegisterSize == RegisterSize.Simd64) || scalar) + if (op.RegisterSize == RegisterSize.Simd64) { EmitVectorZeroUpper(context, op.Rd); } @@ -1052,11 +1032,6 @@ namespace ChocolArm64.Instructions int bytes = op.GetBitsCount() >> 3; int elems = !scalar ? bytes >> op.Size : 1; - if (scalar) - { - EmitVectorZeroLowerTmp(context); - } - if (add || sub) { for (int index = 0; index < elems; index++) @@ -1082,7 +1057,12 @@ namespace ChocolArm64.Instructions } } - EmitVectorInsertTmp(context, index, op.Size); + if (scalar) + { + EmitVectorZeroAll(context, op.Rd); + } + + EmitVectorInsert(context, op.Rd, index, op.Size); } } else if (accumulate) @@ -1103,7 +1083,12 @@ namespace ChocolArm64.Instructions EmitBinarySatQAccumulate(context, signed); } - EmitVectorInsertTmp(context, index, op.Size); + if (scalar) + { + EmitVectorZeroAll(context, op.Rd); + } + + EmitVectorInsert(context, op.Rd, index, op.Size); } } else @@ -1117,14 +1102,16 @@ namespace ChocolArm64.Instructions EmitSatQ(context, op.Size, true, signed); - EmitVectorInsertTmp(context, index, op.Size); + if (scalar) + { + EmitVectorZeroAll(context, op.Rd); + } + + EmitVectorInsert(context, op.Rd, index, op.Size); } } - context.EmitLdvectmp(); - context.EmitStvec(op.Rd); - - if ((op.RegisterSize == RegisterSize.Simd64) || scalar) + if (op.RegisterSize == RegisterSize.Simd64) { EmitVectorZeroUpper(context, op.Rd); } @@ -1190,7 +1177,7 @@ namespace ChocolArm64.Instructions // TSrc (16bit, 32bit, 64bit; signed, unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned). public static void EmitSatQ(ILEmitterCtx context, int sizeDst, bool signedSrc, bool signedDst) { - if ((uint)sizeDst > 2) + if ((uint)sizeDst > 2u) { throw new ArgumentOutOfRangeException(nameof(sizeDst)); } @@ -1381,15 +1368,15 @@ namespace ChocolArm64.Instructions if (Optimizations.UseSse) { //TODO: Use Sse2.MoveScalar once it is fixed, - //as of the time of writing it just crashes the JIT (SDK 2.1.503). + //as of the time of writing it just crashes the JIT (SDK 2.1.504). /*Type[] typesMov = new Type[] { typeof(Vector128<ulong>) }; - EmitLdvecWithUnsignedCast(context, reg, 3); + context.EmitLdvec(reg); context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MoveScalar), typesMov)); - EmitStvecWithUnsignedCast(context, reg, 3);*/ + context.EmitStvec(reg);*/ context.EmitLdvec(reg); VectorHelper.EmitCall(context, nameof(VectorHelper.VectorSingleZero)); |
