diff options
| author | LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> | 2018-10-14 04:35:16 +0200 |
|---|---|---|
| committer | gdkchan <gab.dark.100@gmail.com> | 2018-10-13 23:35:16 -0300 |
| commit | 894459fcd7797b1e38f2448797d83856d11b6e23 (patch) | |
| tree | 87a67e3b80cba4b05a29d243db63d130e1b362c2 /ChocolArm64/Instruction | |
| parent | ac1a379265d0c02a8bd4a146c205f21e2d00f3ab (diff) | |
Add Fmls_Se, Fmulx_Se/Ve, Smov_S Inst.; Opt. Clz/Clz_V, Cnt_V, Shl_V, S/Ushr_V, S/Usra_V Inst.; Add 11 Tests. Some fixes. (#449)
* Update AOpCodeTable.cs
* Update AInstEmitSimdMove.cs
* Update AInstEmitSimdArithmetic.cs
* Update AInstEmitSimdShift.cs
* Update ASoftFallback.cs
* Update ASoftFloat.cs
* Update AOpCodeSimdRegElemF.cs
* Update CpuTestSimdIns.cs
* Update CpuTestSimdRegElem.cs
* Create CpuTestSimdRegElemF.cs
* Update CpuTestSimd.cs
* Update CpuTestSimdReg.cs
* Superseded Fmul_Se Test. Nit.
* Address PR feedback.
* Address PR feedback.
* Update AInstEmitSimdArithmetic.cs
* Update ASoftFallback.cs
* Update AInstEmitAlu.cs
* Update AInstEmitSimdShift.cs
Diffstat (limited to 'ChocolArm64/Instruction')
| -rw-r--r-- | ChocolArm64/Instruction/AInstEmitAlu.cs | 14 | ||||
| -rw-r--r-- | ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs | 81 | ||||
| -rw-r--r-- | ChocolArm64/Instruction/AInstEmitSimdMove.cs | 14 | ||||
| -rw-r--r-- | ChocolArm64/Instruction/AInstEmitSimdShift.cs | 131 | ||||
| -rw-r--r-- | ChocolArm64/Instruction/ASoftFallback.cs | 19 | ||||
| -rw-r--r-- | ChocolArm64/Instruction/ASoftFloat.cs | 16 |
6 files changed, 235 insertions, 40 deletions
diff --git a/ChocolArm64/Instruction/AInstEmitAlu.cs b/ChocolArm64/Instruction/AInstEmitAlu.cs index 490387e1..4551346b 100644 --- a/ChocolArm64/Instruction/AInstEmitAlu.cs +++ b/ChocolArm64/Instruction/AInstEmitAlu.cs @@ -4,6 +4,7 @@ using ChocolArm64.Translation; using System; using System.Reflection; using System.Reflection.Emit; +using System.Runtime.Intrinsics.X86; using static ChocolArm64.Instruction.AInstEmitAluHelper; @@ -117,9 +118,18 @@ namespace ChocolArm64.Instruction Context.EmitLdintzr(Op.Rn); - Context.EmitLdc_I4(Op.RegisterSize == ARegisterSize.Int32 ? 32 : 64); + if (Lzcnt.IsSupported) + { + Type TValue = Op.RegisterSize == ARegisterSize.Int32 ? typeof(uint) : typeof(ulong); - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountLeadingZeros)); + Context.EmitCall(typeof(Lzcnt).GetMethod(nameof(Lzcnt.LeadingZeroCount), new Type[] { TValue })); + } + else + { + Context.EmitLdc_I4(Op.RegisterSize == ARegisterSize.Int32 ? 32 : 64); + + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountLeadingZeros)); + } Context.EmitStintzr(Op.Rd); } diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs index d11a0b84..7ba08f5e 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs @@ -83,19 +83,31 @@ namespace ChocolArm64.Instruction public static void Cls_V(AILEmitterCtx Context) { - MethodInfo MthdInfo = typeof(ASoftFallback).GetMethod(nameof(ASoftFallback.CountLeadingSigns)); + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - EmitCountLeadingBits(Context, () => Context.EmitCall(MthdInfo)); - } + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; - public static void Clz_V(AILEmitterCtx Context) - { - MethodInfo MthdInfo = typeof(ASoftFallback).GetMethod(nameof(ASoftFallback.CountLeadingZeros)); + int ESize = 8 << Op.Size; - EmitCountLeadingBits(Context, () => Context.EmitCall(MthdInfo)); + for (int Index = 0; Index < Elems; Index++) + { + EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size); + + Context.EmitLdc_I4(ESize); + + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountLeadingSigns)); + + EmitVectorInsert(Context, Op.Rd, Index, Op.Size); + } + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } } - private static void EmitCountLeadingBits(AILEmitterCtx Context, Action Emit) + public static void Clz_V(AILEmitterCtx Context) { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; @@ -108,9 +120,20 @@ namespace ChocolArm64.Instruction { EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size); - Context.EmitLdc_I4(ESize); + if (Lzcnt.IsSupported && ESize == 32) + { + Context.Emit(OpCodes.Conv_U4); - Emit(); + Context.EmitCall(typeof(Lzcnt).GetMethod(nameof(Lzcnt.LeadingZeroCount), new Type[] { typeof(uint) })); + + Context.Emit(OpCodes.Conv_U8); + } + else + { + Context.EmitLdc_I4(ESize); + + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountLeadingZeros)); + } EmitVectorInsert(Context, Op.Rd, Index, Op.Size); } @@ -131,11 +154,14 @@ namespace ChocolArm64.Instruction { EmitVectorExtractZx(Context, Op.Rn, Index, 0); - Context.Emit(OpCodes.Conv_U4); - - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountSetBits8)); - - Context.Emit(OpCodes.Conv_U8); + if (Popcnt.IsSupported) + { + Context.EmitCall(typeof(Popcnt).GetMethod(nameof(Popcnt.PopCount), new Type[] { typeof(ulong) })); + } + else + { + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountSetBits8)); + } EmitVectorInsert(Context, Op.Rd, Index, 0); } @@ -440,6 +466,15 @@ namespace ChocolArm64.Instruction }); } + public static void Fmls_Se(AILEmitterCtx Context) + { + EmitScalarTernaryOpByElemF(Context, () => + { + Context.Emit(OpCodes.Mul); + Context.Emit(OpCodes.Sub); + }); + } + public static void Fmls_V(AILEmitterCtx Context) { EmitVectorTernaryOpF(Context, () => @@ -554,6 +589,14 @@ namespace ChocolArm64.Instruction }); } + public static void Fmulx_Se(AILEmitterCtx Context) + { + EmitScalarBinaryOpByElemF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMulX)); + }); + } + public static void Fmulx_V(AILEmitterCtx Context) { EmitVectorBinaryOpF(Context, () => @@ -562,6 +605,14 @@ namespace ChocolArm64.Instruction }); } + public static void Fmulx_Ve(AILEmitterCtx Context) + { + EmitVectorBinaryOpByElemF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMulX)); + }); + } + public static void Fneg_S(AILEmitterCtx Context) { EmitScalarUnaryOpF(Context, () => Context.Emit(OpCodes.Neg)); diff --git a/ChocolArm64/Instruction/AInstEmitSimdMove.cs b/ChocolArm64/Instruction/AInstEmitSimdMove.cs index 94097f48..6001f48c 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdMove.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdMove.cs @@ -249,6 +249,17 @@ namespace ChocolArm64.Instruction EmitVectorImmUnaryOp(Context, () => Context.Emit(OpCodes.Not)); } + public static void Smov_S(AILEmitterCtx Context) + { + AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp; + + EmitVectorExtractSx(Context, Op.Rn, Op.DstIndex, Op.Size); + + EmitIntZeroUpperIfNeeded(Context); + + Context.EmitStintzr(Op.Rd); + } + public static void Tbl_V(AILEmitterCtx Context) { AOpCodeSimdTbl Op = (AOpCodeSimdTbl)Context.CurrOp; @@ -421,7 +432,8 @@ namespace ChocolArm64.Instruction private static void EmitIntZeroUpperIfNeeded(AILEmitterCtx Context) { - if (Context.CurrOp.RegisterSize == ARegisterSize.Int32) + if (Context.CurrOp.RegisterSize == ARegisterSize.Int32 || + Context.CurrOp.RegisterSize == ARegisterSize.SIMD64) { Context.Emit(OpCodes.Conv_U4); Context.Emit(OpCodes.Conv_U8); diff --git a/ChocolArm64/Instruction/AInstEmitSimdShift.cs b/ChocolArm64/Instruction/AInstEmitSimdShift.cs index 127abf1d..8918c0e1 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdShift.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdShift.cs @@ -3,6 +3,7 @@ using ChocolArm64.State; using ChocolArm64.Translation; using System; using System.Reflection.Emit; +using System.Runtime.Intrinsics.X86; using static ChocolArm64.Instruction.AInstEmitSimdHelper; @@ -31,12 +32,32 @@ namespace ChocolArm64.Instruction { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - EmitVectorUnaryOpZx(Context, () => + if (AOptimizations.UseSse2 && Op.Size > 0) { + Type[] Types = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) }; + + EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size); + Context.EmitLdc_I4(GetImmShl(Op)); - Context.Emit(OpCodes.Shl); - }); + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), Types)); + + EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size); + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + else + { + EmitVectorUnaryOpZx(Context, () => + { + Context.EmitLdc_I4(GetImmShl(Op)); + + Context.Emit(OpCodes.Shl); + }); + } } public static void Shll_V(AILEmitterCtx Context) @@ -167,7 +188,30 @@ namespace ChocolArm64.Instruction public static void Sshr_V(AILEmitterCtx Context) { - EmitShrImmOp(Context, ShrImmFlags.VectorSx); + AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + + if (AOptimizations.UseSse2 && Op.Size > 0 + && Op.Size < 3) + { + Type[] Types = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) }; + + EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size); + + Context.EmitLdc_I4(GetImmShr(Op)); + + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), Types)); + + EmitStvecWithSignedCast(Context, Op.Rd, Op.Size); + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + else + { + EmitShrImmOp(Context, ShrImmFlags.VectorSx); + } } public static void Ssra_S(AILEmitterCtx Context) @@ -177,7 +221,33 @@ namespace ChocolArm64.Instruction public static void Ssra_V(AILEmitterCtx Context) { - EmitVectorShrImmOpSx(Context, ShrImmFlags.Accumulate); + AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + + if (AOptimizations.UseSse2 && Op.Size > 0 + && Op.Size < 3) + { + Type[] TypesSra = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) }; + Type[] TypesAdd = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], VectorIntTypesPerSizeLog2[Op.Size] }; + + EmitLdvecWithSignedCast(Context, Op.Rd, Op.Size); + EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size); + + Context.EmitLdc_I4(GetImmShr(Op)); + + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), TypesSra)); + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd)); + + EmitStvecWithSignedCast(Context, Op.Rd, Op.Size); + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + else + { + EmitVectorShrImmOpSx(Context, ShrImmFlags.Accumulate); + } } public static void Uqrshrn_S(AILEmitterCtx Context) @@ -239,7 +309,29 @@ namespace ChocolArm64.Instruction public static void Ushr_V(AILEmitterCtx Context) { - EmitShrImmOp(Context, ShrImmFlags.VectorZx); + AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + + if (AOptimizations.UseSse2 && Op.Size > 0) + { + Type[] Types = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) }; + + EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size); + + Context.EmitLdc_I4(GetImmShr(Op)); + + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), Types)); + + EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size); + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + else + { + EmitShrImmOp(Context, ShrImmFlags.VectorZx); + } } public static void Usra_S(AILEmitterCtx Context) @@ -249,7 +341,32 @@ namespace ChocolArm64.Instruction public static void Usra_V(AILEmitterCtx Context) { - EmitVectorShrImmOpZx(Context, ShrImmFlags.Accumulate); + AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + + if (AOptimizations.UseSse2 && Op.Size > 0) + { + Type[] TypesSrl = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) }; + Type[] TypesAdd = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], VectorUIntTypesPerSizeLog2[Op.Size] }; + + EmitLdvecWithUnsignedCast(Context, Op.Rd, Op.Size); + EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size); + + Context.EmitLdc_I4(GetImmShr(Op)); + + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesSrl)); + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd)); + + EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size); + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + else + { + EmitVectorShrImmOpZx(Context, ShrImmFlags.Accumulate); + } } private static void EmitVectorShl(AILEmitterCtx Context, bool Signed) diff --git a/ChocolArm64/Instruction/ASoftFallback.cs b/ChocolArm64/Instruction/ASoftFallback.cs index a7bc1085..3c5c5c4d 100644 --- a/ChocolArm64/Instruction/ASoftFallback.cs +++ b/ChocolArm64/Instruction/ASoftFallback.cs @@ -386,7 +386,7 @@ namespace ChocolArm64.Instruction #endregion #region "Count" - public static ulong CountLeadingSigns(ulong Value, int Size) + public static ulong CountLeadingSigns(ulong Value, int Size) // Size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.). { Value ^= Value >> 1; @@ -405,9 +405,9 @@ namespace ChocolArm64.Instruction private static readonly byte[] ClzNibbleTbl = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 }; - public static ulong CountLeadingZeros(ulong Value, int Size) + public static ulong CountLeadingZeros(ulong Value, int Size) // Size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.). { - if (Value == 0) + if (Value == 0ul) { return (ulong)Size; } @@ -426,12 +426,17 @@ namespace ChocolArm64.Instruction return (ulong)Count; } - public static uint CountSetBits8(uint Value) + public static ulong CountSetBits8(ulong Value) // "Size" is 8 (SIMD&FP Inst.). { - Value = ((Value >> 1) & 0x55) + (Value & 0x55); - Value = ((Value >> 2) & 0x33) + (Value & 0x33); + if (Value == 0xfful) + { + return 8ul; + } + + Value = ((Value >> 1) & 0x55ul) + (Value & 0x55ul); + Value = ((Value >> 2) & 0x33ul) + (Value & 0x33ul); - return (Value >> 4) + (Value & 0x0f); + return (Value >> 4) + (Value & 0x0ful); } #endregion diff --git a/ChocolArm64/Instruction/ASoftFloat.cs b/ChocolArm64/Instruction/ASoftFloat.cs index 7412c976..2d9a9f0e 100644 --- a/ChocolArm64/Instruction/ASoftFloat.cs +++ b/ChocolArm64/Instruction/ASoftFloat.cs @@ -365,8 +365,8 @@ namespace ChocolArm64.Instruction { Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_32.FPMaxNum: "); - Value1.FPUnpack(out FPType Type1, out bool Sign1, out uint Op1); - Value2.FPUnpack(out FPType Type2, out bool Sign2, out uint Op2); + Value1.FPUnpack(out FPType Type1, out _, out _); + Value2.FPUnpack(out FPType Type2, out _, out _); if (Type1 == FPType.QNaN && Type2 != FPType.QNaN) { @@ -430,8 +430,8 @@ namespace ChocolArm64.Instruction { Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_32.FPMinNum: "); - Value1.FPUnpack(out FPType Type1, out bool Sign1, out uint Op1); - Value2.FPUnpack(out FPType Type2, out bool Sign2, out uint Op2); + Value1.FPUnpack(out FPType Type1, out _, out _); + Value2.FPUnpack(out FPType Type2, out _, out _); if (Type1 == FPType.QNaN && Type2 != FPType.QNaN) { @@ -1091,8 +1091,8 @@ namespace ChocolArm64.Instruction { Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_64.FPMaxNum: "); - Value1.FPUnpack(out FPType Type1, out bool Sign1, out ulong Op1); - Value2.FPUnpack(out FPType Type2, out bool Sign2, out ulong Op2); + Value1.FPUnpack(out FPType Type1, out _, out _); + Value2.FPUnpack(out FPType Type2, out _, out _); if (Type1 == FPType.QNaN && Type2 != FPType.QNaN) { @@ -1156,8 +1156,8 @@ namespace ChocolArm64.Instruction { Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_64.FPMinNum: "); - Value1.FPUnpack(out FPType Type1, out bool Sign1, out ulong Op1); - Value2.FPUnpack(out FPType Type2, out bool Sign2, out ulong Op2); + Value1.FPUnpack(out FPType Type1, out _, out _); + Value2.FPUnpack(out FPType Type2, out _, out _); if (Type1 == FPType.QNaN && Type2 != FPType.QNaN) { |
