diff options
| author | LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> | 2019-10-25 01:37:42 +0200 |
|---|---|---|
| committer | gdkchan <gab.dark.100@gmail.com> | 2019-10-24 20:37:42 -0300 |
| commit | eff8379d2aa52574b6df25e469b87a142cc3863f (patch) | |
| tree | d39bd91dd4b89219c77b444e2ca68bf83502d805 /ARMeilleure | |
| parent | 86b42f176af69b58b493396dff0a5958e073b39b (diff) | |
Add Sli_S/V & Sri_S/V inst.s (fast & slow paths), with Tests. (#797)
* Add Sli & Sri.
* Add scalar variants.
Diffstat (limited to 'ARMeilleure')
| -rw-r--r-- | ARMeilleure/Decoders/OpCodeTable.cs | 4 | ||||
| -rw-r--r-- | ARMeilleure/Instructions/InstEmitSimdShift.cs | 170 | ||||
| -rw-r--r-- | ARMeilleure/Instructions/InstName.cs | 3 |
3 files changed, 144 insertions, 33 deletions
diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index 55cd5cb6..2fa7702d 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -437,6 +437,7 @@ namespace ARMeilleure.Decoders SetA64("0x101110<<100001001110xxxxxxxxxx", InstName.Shll_V, InstEmit.Shll_V, typeof(OpCodeSimd)); SetA64("0x00111100>>>xxx100001xxxxxxxxxx", InstName.Shrn_V, InstEmit.Shrn_V, typeof(OpCodeSimdShImm)); SetA64("0x001110<<1xxxxx001001xxxxxxxxxx", InstName.Shsub_V, InstEmit.Shsub_V, typeof(OpCodeSimdReg)); + SetA64("0111111101xxxxxx010101xxxxxxxxxx", InstName.Sli_S, InstEmit.Sli_S, typeof(OpCodeSimdShImm)); SetA64("0x10111100>>>xxx010101xxxxxxxxxx", InstName.Sli_V, InstEmit.Sli_V, typeof(OpCodeSimdShImm)); SetA64("0110111101xxxxxx010101xxxxxxxxxx", InstName.Sli_V, InstEmit.Sli_V, typeof(OpCodeSimdShImm)); SetA64("0x001110<<1xxxxx011001xxxxxxxxxx", InstName.Smax_V, InstEmit.Smax_V, typeof(OpCodeSimdReg)); @@ -485,6 +486,9 @@ namespace ARMeilleure.Decoders SetA64("01111110<<100001001010xxxxxxxxxx", InstName.Sqxtun_S, InstEmit.Sqxtun_S, typeof(OpCodeSimd)); SetA64("0x101110<<100001001010xxxxxxxxxx", InstName.Sqxtun_V, InstEmit.Sqxtun_V, typeof(OpCodeSimd)); SetA64("0x001110<<1xxxxx000101xxxxxxxxxx", InstName.Srhadd_V, InstEmit.Srhadd_V, typeof(OpCodeSimdReg)); + SetA64("0111111101xxxxxx010001xxxxxxxxxx", InstName.Sri_S, InstEmit.Sri_S, typeof(OpCodeSimdShImm)); + SetA64("0x10111100>>>xxx010001xxxxxxxxxx", InstName.Sri_V, InstEmit.Sri_V, typeof(OpCodeSimdShImm)); + SetA64("0110111101xxxxxx010001xxxxxxxxxx", InstName.Sri_V, InstEmit.Sri_V, typeof(OpCodeSimdShImm)); SetA64("0>001110<<1xxxxx010101xxxxxxxxxx", InstName.Srshl_V, InstEmit.Srshl_V, typeof(OpCodeSimdReg)); SetA64("0101111101xxxxxx001001xxxxxxxxxx", InstName.Srshr_S, InstEmit.Srshr_S, typeof(OpCodeSimdShImm)); SetA64("0x00111100>>>xxx001001xxxxxxxxxx", InstName.Srshr_V, InstEmit.Srshr_V, typeof(OpCodeSimdShImm)); diff --git a/ARMeilleure/Instructions/InstEmitSimdShift.cs b/ARMeilleure/Instructions/InstEmitSimdShift.cs index 1aae491d..17f43c81 100644 --- a/ARMeilleure/Instructions/InstEmitSimdShift.cs +++ b/ARMeilleure/Instructions/InstEmitSimdShift.cs @@ -22,6 +22,11 @@ namespace ARMeilleure.Instructions 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0 }; + + private static readonly long[] _masks_SliSri = new long[] // Replication masks. + { + 0x0101010101010101L, 0x0001000100010001L, 0x0000000100000001L, 0x0000000000000001L + }; #endregion public static void Rshrn_V(ArmEmitterContext context) @@ -66,7 +71,7 @@ namespace ARMeilleure.Instructions res = context.AddIntrinsic(movInst, dLow, res); - context.Copy(GetVec(op.Rd), res); + context.Copy(d, res); } else { @@ -106,7 +111,7 @@ namespace ARMeilleure.Instructions } else { - EmitVectorUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift))); + EmitVectorUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift))); } } @@ -149,8 +154,6 @@ namespace ARMeilleure.Instructions int shift = GetImmShr(op); - long roundConst = 1L << (shift - 1); - Operand d = GetVec(op.Rd); Operand n = GetVec(op.Rn); @@ -170,7 +173,7 @@ namespace ARMeilleure.Instructions res = context.AddIntrinsic(movInst, dLow, res); - context.Copy(GetVec(op.Rd), res); + context.Copy(d, res); } else { @@ -178,34 +181,14 @@ namespace ARMeilleure.Instructions } } - public static void Sli_V(ArmEmitterContext context) + public static void Sli_S(ArmEmitterContext context) { - OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; - - Operand res = context.VectorZero(); - - int elems = op.GetBytesCount() >> op.Size; - - int shift = GetImmShl(op); - - ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0; - - for (int index = 0; index < elems; index++) - { - Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); - - Operand neShifted = context.ShiftLeft(ne, Const(shift)); - - Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); - - Operand deMasked = context.BitwiseAnd(de, Const(mask)); - - Operand e = context.BitwiseOr(neShifted, deMasked); - - res = EmitVectorInsert(context, res, e, index, op.Size); - } + EmitSli(context, scalar: true); + } - context.Copy(GetVec(op.Rd), res); + public static void Sli_V(ArmEmitterContext context) + { + EmitSli(context, scalar: false); } public static void Sqrshl_V(ArmEmitterContext context) @@ -290,6 +273,16 @@ namespace ARMeilleure.Instructions EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); } + public static void Sri_S(ArmEmitterContext context) + { + EmitSri(context, scalar: true); + } + + public static void Sri_V(ArmEmitterContext context) + { + EmitSri(context, scalar: false); + } + public static void Srshl_V(ArmEmitterContext context) { OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; @@ -395,7 +388,7 @@ namespace ARMeilleure.Instructions res = context.VectorZeroUpper64(res); } - context.Copy(GetVec(op.Rd), res); + context.Copy(d, res); } else { @@ -690,7 +683,7 @@ namespace ARMeilleure.Instructions res = context.VectorZeroUpper64(res); } - context.Copy(GetVec(op.Rd), res); + context.Copy(d, res); } else { @@ -1053,5 +1046,116 @@ namespace ARMeilleure.Instructions context.Copy(GetVec(op.Rd), res); } + + private static void EmitSli(ArmEmitterContext context, bool scalar) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + + ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0UL; + + if (Optimizations.UseSse2 && op.Size > 0) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Intrinsic sllInst = X86PsllInstruction[op.Size]; + + Operand nShifted = context.AddIntrinsic(sllInst, n, Const(shift)); + + Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]); + + Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask); + + Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked); + + if ((op.RegisterSize == RegisterSize.Simd64) || scalar) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else + { + Operand res = context.VectorZero(); + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + Operand neShifted = context.ShiftLeft(ne, Const(shift)); + + Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); + + Operand deMasked = context.BitwiseAnd(de, Const(mask)); + + Operand e = context.BitwiseOr(neShifted, deMasked); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + private static void EmitSri(ArmEmitterContext context, bool scalar) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + ulong mask = (ulong.MaxValue << (eSize - shift)) & (ulong.MaxValue >> (64 - eSize)); + + if (Optimizations.UseSse2 && op.Size > 0) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Intrinsic srlInst = X86PsrlInstruction[op.Size]; + + Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift)); + + Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]); + + Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask); + + Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked); + + if ((op.RegisterSize == RegisterSize.Simd64) || scalar) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else + { + Operand res = context.VectorZero(); + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + Operand neShifted = shift != 64 ? context.ShiftRightUI(ne, Const(shift)) : Const(0UL); + + Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); + + Operand deMasked = context.BitwiseAnd(de, Const(mask)); + + Operand e = context.BitwiseOr(neShifted, deMasked); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } } } diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs index e03ab616..c81484a6 100644 --- a/ARMeilleure/Instructions/InstName.cs +++ b/ARMeilleure/Instructions/InstName.cs @@ -313,6 +313,7 @@ namespace ARMeilleure.Instructions Shll_V, Shrn_V, Shsub_V, + Sli_S, Sli_V, Smax_V, Smaxp_V, @@ -354,6 +355,8 @@ namespace ARMeilleure.Instructions Sqxtun_S, Sqxtun_V, Srhadd_V, + Sri_S, + Sri_V, Srshl_V, Srshr_S, Srshr_V, |
