diff options
| author | gdkchan <gab.dark.100@gmail.com> | 2018-05-11 20:10:27 -0300 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-05-11 20:10:27 -0300 |
| commit | f9f111bc85a4735391a8479e9a8d36a30ae7f3a9 (patch) | |
| tree | a1b29a56dc151875a58611382b3a67050b32a95c /ChocolArm64/Instruction | |
| parent | 8e306b3ac14f93ef4e77210c2a23a219760bb55c (diff) | |
Add intrinsics support (#121)
* Initial intrinsics support
* Update tests to work with the new Vector128 type and intrinsics
* Drop SSE4.1 requirement
* Fix copy-paste mistake
Diffstat (limited to 'ChocolArm64/Instruction')
| -rw-r--r-- | ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs | 125 | ||||
| -rw-r--r-- | ChocolArm64/Instruction/AInstEmitSimdCmp.cs | 82 | ||||
| -rw-r--r-- | ChocolArm64/Instruction/AInstEmitSimdCvt.cs | 28 | ||||
| -rw-r--r-- | ChocolArm64/Instruction/AInstEmitSimdHelper.cs | 149 | ||||
| -rw-r--r-- | ChocolArm64/Instruction/AInstEmitSimdLogical.cs | 28 | ||||
| -rw-r--r-- | ChocolArm64/Instruction/AInstEmitSimdMove.cs | 24 | ||||
| -rw-r--r-- | ChocolArm64/Instruction/ASoftFallback.cs | 315 | ||||
| -rw-r--r-- | ChocolArm64/Instruction/AVectorHelper.cs | 626 |
8 files changed, 985 insertions, 392 deletions
diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs index 3a4b2210..0dfe0bd3 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs @@ -4,6 +4,7 @@ using ChocolArm64.Translation; using System; using System.Reflection; using System.Reflection.Emit; +using System.Runtime.Intrinsics.X86; using static ChocolArm64.Instruction.AInstEmitSimdHelper; @@ -41,7 +42,14 @@ namespace ChocolArm64.Instruction public static void Add_V(AILEmitterCtx Context) { - EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Add)); + if (AOptimizations.UseSse2) + { + EmitSse2Call(Context, nameof(Sse2.Add)); + } + else + { + EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Add)); + } } public static void Addhn_V(AILEmitterCtx Context) @@ -158,7 +166,7 @@ namespace ChocolArm64.Instruction Context.Emit(OpCodes.Conv_U1); - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountSetBits8)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.CountSetBits8)); Context.Emit(OpCodes.Conv_U8); @@ -303,12 +311,26 @@ namespace ChocolArm64.Instruction public static void Fadd_S(AILEmitterCtx Context) { - EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Add)); + if (AOptimizations.UseSse2) + { + EmitSse2CallF(Context, nameof(Sse2.AddScalar)); + } + else + { + EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Add)); + } } public static void Fadd_V(AILEmitterCtx Context) { - EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Add)); + if (AOptimizations.UseSse2) + { + EmitSse2CallF(Context, nameof(Sse2.Add)); + } + else + { + EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Add)); + } } public static void Faddp_V(AILEmitterCtx Context) @@ -345,12 +367,26 @@ namespace ChocolArm64.Instruction public static void Fdiv_S(AILEmitterCtx Context) { - EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Div)); + if (AOptimizations.UseSse2) + { + EmitSse2CallF(Context, nameof(Sse2.DivideScalar)); + } + else + { + EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Div)); + } } public static void Fdiv_V(AILEmitterCtx Context) { - EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Div)); + if (AOptimizations.UseSse2) + { + EmitSse2CallF(Context, nameof(Sse2.Divide)); + } + else + { + EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Div)); + } } public static void Fmadd_S(AILEmitterCtx Context) @@ -370,11 +406,11 @@ namespace ChocolArm64.Instruction { if (Op.Size == 0) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.MaxF)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MaxF)); } else if (Op.Size == 1) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Max)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Max)); } else { @@ -391,11 +427,11 @@ namespace ChocolArm64.Instruction { if (Op.Size == 0) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.MaxF)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MaxF)); } else if (Op.Size == 1) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Max)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Max)); } else { @@ -412,11 +448,11 @@ namespace ChocolArm64.Instruction { if (Op.Size == 0) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.MinF)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MinF)); } else if (Op.Size == 1) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Min)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Min)); } else { @@ -435,11 +471,11 @@ namespace ChocolArm64.Instruction { if (SizeF == 0) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.MinF)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.MinF)); } else if (SizeF == 1) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Min)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Min)); } else { @@ -505,7 +541,14 @@ namespace ChocolArm64.Instruction public static void Fmul_S(AILEmitterCtx Context) { - EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Mul)); + if (AOptimizations.UseSse2) + { + EmitSse2CallF(Context, nameof(Sse2.MultiplyScalar)); + } + else + { + EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Mul)); + } } public static void Fmul_Se(AILEmitterCtx Context) @@ -515,7 +558,14 @@ namespace ChocolArm64.Instruction public static void Fmul_V(AILEmitterCtx Context) { - EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Mul)); + if (AOptimizations.UseSse2) + { + EmitSse2CallF(Context, nameof(Sse2.Multiply)); + } + else + { + EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Mul)); + } } public static void Fmul_Ve(AILEmitterCtx Context) @@ -716,11 +766,11 @@ namespace ChocolArm64.Instruction if (Op.Size == 0) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.RoundF)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF)); } else if (Op.Size == 1) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Round)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Round)); } else { @@ -743,11 +793,11 @@ namespace ChocolArm64.Instruction if (SizeF == 0) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.RoundF)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF)); } else if (SizeF == 1) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Round)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Round)); } else { @@ -819,11 +869,11 @@ namespace ChocolArm64.Instruction if (Op.Size == 0) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.RoundF)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF)); } else if (Op.Size == 1) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Round)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Round)); } else { @@ -844,11 +894,11 @@ namespace ChocolArm64.Instruction if (Op.Size == 0) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.RoundF)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.RoundF)); } else if (Op.Size == 1) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.Round)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Round)); } else { @@ -947,12 +997,26 @@ namespace ChocolArm64.Instruction public static void Fsub_S(AILEmitterCtx Context) { - EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Sub)); + if (AOptimizations.UseSse2) + { + EmitSse2CallF(Context, nameof(Sse2.SubtractScalar)); + } + else + { + EmitScalarBinaryOpF(Context, () => Context.Emit(OpCodes.Sub)); + } } public static void Fsub_V(AILEmitterCtx Context) { - EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Sub)); + if (AOptimizations.UseSse2) + { + EmitSse2CallF(Context, nameof(Sse2.Subtract)); + } + else + { + EmitVectorBinaryOpF(Context, () => Context.Emit(OpCodes.Sub)); + } } public static void Mla_V(AILEmitterCtx Context) @@ -1066,7 +1130,14 @@ namespace ChocolArm64.Instruction public static void Sub_V(AILEmitterCtx Context) { - EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Sub)); + if (AOptimizations.UseSse2) + { + EmitSse2Call(Context, nameof(Sse2.Subtract)); + } + else + { + EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Sub)); + } } public static void Subhn_V(AILEmitterCtx Context) diff --git a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs index f155d7e8..583ad702 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs @@ -3,6 +3,7 @@ using ChocolArm64.State; using ChocolArm64.Translation; using System; using System.Reflection.Emit; +using System.Runtime.Intrinsics.X86; using static ChocolArm64.Instruction.AInstEmitAluHelper; using static ChocolArm64.Instruction.AInstEmitSimdHelper; @@ -13,17 +14,38 @@ namespace ChocolArm64.Instruction { public static void Cmeq_V(AILEmitterCtx Context) { - EmitVectorCmp(Context, OpCodes.Beq_S); + if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg) + { + EmitSse2Call(Context, nameof(Sse2.CompareEqual)); + } + else + { + EmitVectorCmp(Context, OpCodes.Beq_S); + } } public static void Cmge_V(AILEmitterCtx Context) { - EmitVectorCmp(Context, OpCodes.Bge_S); + if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg) + { + EmitSse2Call(Context, nameof(Sse2.CompareGreaterThanOrEqual)); + } + else + { + EmitVectorCmp(Context, OpCodes.Bge_S); + } } public static void Cmgt_V(AILEmitterCtx Context) { - EmitVectorCmp(Context, OpCodes.Bgt_S); + if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg) + { + EmitSse2Call(Context, nameof(Sse2.CompareGreaterThan)); + } + else + { + EmitVectorCmp(Context, OpCodes.Bgt_S); + } } public static void Cmhi_V(AILEmitterCtx Context) @@ -112,32 +134,74 @@ namespace ChocolArm64.Instruction public static void Fcmeq_S(AILEmitterCtx Context) { - EmitScalarFcmp(Context, OpCodes.Beq_S); + if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg) + { + EmitSse2CallF(Context, nameof(Sse2.CompareEqualScalar)); + } + else + { + EmitScalarFcmp(Context, OpCodes.Beq_S); + } } public static void Fcmeq_V(AILEmitterCtx Context) { - EmitVectorFcmp(Context, OpCodes.Beq_S); + if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg) + { + EmitSse2CallF(Context, nameof(Sse2.CompareEqual)); + } + else + { + EmitVectorFcmp(Context, OpCodes.Beq_S); + } } public static void Fcmge_S(AILEmitterCtx Context) { - EmitScalarFcmp(Context, OpCodes.Bge_S); + if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg) + { + EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThanOrEqualScalar)); + } + else + { + EmitScalarFcmp(Context, OpCodes.Bge_S); + } } public static void Fcmge_V(AILEmitterCtx Context) { - EmitVectorFcmp(Context, OpCodes.Bge_S); + if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg) + { + EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThanOrEqual)); + } + else + { + EmitVectorFcmp(Context, OpCodes.Bge_S); + } } public static void Fcmgt_S(AILEmitterCtx Context) { - EmitScalarFcmp(Context, OpCodes.Bgt_S); + if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg) + { + EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThanScalar)); + } + else + { + EmitScalarFcmp(Context, OpCodes.Bgt_S); + } } public static void Fcmgt_V(AILEmitterCtx Context) { - EmitVectorFcmp(Context, OpCodes.Bgt_S); + if (AOptimizations.UseSse2 && Context.CurrOp is AOpCodeSimdReg) + { + EmitSse2CallF(Context, nameof(Sse2.CompareGreaterThan)); + } + else + { + EmitVectorFcmp(Context, OpCodes.Bgt_S); + } } public static void Fcmle_S(AILEmitterCtx Context) diff --git a/ChocolArm64/Instruction/AInstEmitSimdCvt.cs b/ChocolArm64/Instruction/AInstEmitSimdCvt.cs index b9944e56..444638e6 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdCvt.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdCvt.cs @@ -382,15 +382,15 @@ namespace ChocolArm64.Instruction if (SizeF == 0) { - ASoftFallback.EmitCall(Context, Signed - ? nameof(ASoftFallback.SatF32ToS32) - : nameof(ASoftFallback.SatF32ToU32)); + AVectorHelper.EmitCall(Context, Signed + ? nameof(AVectorHelper.SatF32ToS32) + : nameof(AVectorHelper.SatF32ToU32)); } else /* if (SizeF == 1) */ { - ASoftFallback.EmitCall(Context, Signed - ? nameof(ASoftFallback.SatF64ToS64) - : nameof(ASoftFallback.SatF64ToU64)); + AVectorHelper.EmitCall(Context, Signed + ? nameof(AVectorHelper.SatF64ToS64) + : nameof(AVectorHelper.SatF64ToU64)); } if (SizeF == 0) @@ -420,22 +420,22 @@ namespace ChocolArm64.Instruction { if (Size == 0) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF32ToS32)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF32ToS32)); } else /* if (Size == 1) */ { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF64ToS32)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF64ToS32)); } } else { if (Size == 0) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF32ToS64)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF32ToS64)); } else /* if (Size == 1) */ { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF64ToS64)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF64ToS64)); } } } @@ -453,22 +453,22 @@ namespace ChocolArm64.Instruction { if (Size == 0) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF32ToU32)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF32ToU32)); } else /* if (Size == 1) */ { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF64ToU32)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF64ToU32)); } } else { if (Size == 0) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF32ToU64)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF32ToU64)); } else /* if (Size == 1) */ { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.SatF64ToU64)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.SatF64ToU64)); } } } diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs index 9ef9d02f..3caf2a3e 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs @@ -3,6 +3,8 @@ using ChocolArm64.State; using ChocolArm64.Translation; using System; using System.Reflection; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; namespace ChocolArm64.Instruction { @@ -32,6 +34,129 @@ namespace ChocolArm64.Instruction return (8 << (Op.Size + 1)) - Op.Imm; } + public static void EmitSse2Call(AILEmitterCtx Context, string Name) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + + int SizeF = Op.Size & 1; + + void Ldvec(int Reg) + { + Context.EmitLdvec(Reg); + + switch (Op.Size) + { + case 0: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToSByte)); break; + case 1: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToInt16)); break; + case 2: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToInt32)); break; + case 3: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToInt64)); break; + } + } + + Ldvec(Op.Rn); + + Type BaseType = null; + + Type[] Types; + + switch (Op.Size) + { + case 0: BaseType = typeof(Vector128<sbyte>); break; + case 1: BaseType = typeof(Vector128<short>); break; + case 2: BaseType = typeof(Vector128<int>); break; + case 3: BaseType = typeof(Vector128<long>); break; + } + + if (Op is AOpCodeSimdReg BinOp) + { + Ldvec(BinOp.Rm); + + Types = new Type[] { BaseType, BaseType }; + } + else + { + Types = new Type[] { BaseType }; + } + + Context.EmitCall(typeof(Sse2).GetMethod(Name, Types)); + + switch (Op.Size) + { + case 0: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSByteToSingle)); break; + case 1: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt16ToSingle)); break; + case 2: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt32ToSingle)); break; + case 3: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt64ToSingle)); break; + } + + Context.EmitStvec(Op.Rd); + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + + public static void EmitSse2CallF(AILEmitterCtx Context, string Name) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + + int SizeF = Op.Size & 1; + + void Ldvec(int Reg) + { + Context.EmitLdvec(Reg); + + if (SizeF == 1) + { + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToDouble)); + } + } + + Ldvec(Op.Rn); + + Type BaseType = SizeF == 0 + ? typeof(Vector128<float>) + : typeof(Vector128<double>); + + Type[] Types; + + if (Op is AOpCodeSimdReg BinOp) + { + Ldvec(BinOp.Rm); + + Types = new Type[] { BaseType, BaseType }; + } + else + { + Types = new Type[] { BaseType }; + } + + MethodInfo MthdInfo; + + if (SizeF == 0) + { + MthdInfo = typeof(Sse).GetMethod(Name, Types); + } + else /* if (SizeF == 1) */ + { + MthdInfo = typeof(Sse2).GetMethod(Name, Types); + } + + Context.EmitCall(MthdInfo); + + if (SizeF == 1) + { + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorDoubleToSingle)); + } + + Context.EmitStvec(Op.Rd); + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + public static void EmitUnaryMathCall(AILEmitterCtx Context, string Name) { IAOpCodeSimd Op = (IAOpCodeSimd)Context.CurrOp; @@ -596,9 +721,9 @@ namespace ChocolArm64.Instruction Context.EmitLdc_I4(Index); Context.EmitLdc_I4(Size); - ASoftFallback.EmitCall(Context, Signed - ? nameof(ASoftFallback.VectorExtractIntSx) - : nameof(ASoftFallback.VectorExtractIntZx)); + AVectorHelper.EmitCall(Context, Signed + ? nameof(AVectorHelper.VectorExtractIntSx) + : nameof(AVectorHelper.VectorExtractIntZx)); } public static void EmitVectorExtractF(AILEmitterCtx Context, int Reg, int Index, int Size) @@ -610,11 +735,11 @@ namespace ChocolArm64.Instruction if (Size == 0) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorExtractSingle)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorExtractSingle)); } else if (Size == 1) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorExtractDouble)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorExtractDouble)); } else { @@ -646,7 +771,7 @@ namespace ChocolArm64.Instruction Context.EmitLdc_I4(Index); Context.EmitLdc_I4(Size); - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertInt)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertInt)); Context.EmitStvec(Reg); } @@ -659,7 +784,7 @@ namespace ChocolArm64.Instruction Context.EmitLdc_I4(Index); Context.EmitLdc_I4(Size); - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertInt)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertInt)); Context.EmitStvectmp(); } @@ -673,7 +798,7 @@ namespace ChocolArm64.Instruction Context.EmitLdc_I4(Index); Context.EmitLdc_I4(Size); - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertInt)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertInt)); Context.EmitStvec(Reg); } @@ -687,11 +812,11 @@ namespace ChocolArm64.Instruction if (Size == 0) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertSingle)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertSingle)); } else if (Size == 1) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertDouble)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertDouble)); } else { @@ -710,11 +835,11 @@ namespace ChocolArm64.Instruction if (Size == 0) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertSingle)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertSingle)); } else if (Size == 1) { - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.VectorInsertDouble)); + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertDouble)); } else { diff --git a/ChocolArm64/Instruction/AInstEmitSimdLogical.cs b/ChocolArm64/Instruction/AInstEmitSimdLogical.cs index 25aa873b..9c897bfe 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdLogical.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdLogical.cs @@ -2,6 +2,7 @@ using ChocolArm64.Decoder; using ChocolArm64.State; using ChocolArm64.Translation; using System.Reflection.Emit; +using System.Runtime.Intrinsics.X86; using static ChocolArm64.Instruction.AInstEmitSimdHelper; @@ -11,7 +12,14 @@ namespace ChocolArm64.Instruction { public static void And_V(AILEmitterCtx Context) { - EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.And)); + if (AOptimizations.UseSse2) + { + EmitSse2Call(Context, nameof(Sse2.And)); + } + else + { + EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.And)); + } } public static void Bic_V(AILEmitterCtx Context) @@ -95,7 +103,14 @@ namespace ChocolArm64.Instruction public static void Eor_V(AILEmitterCtx Context) { - EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Xor)); + if (AOptimizations.UseSse2) + { + EmitSse2Call(Context, nameof(Sse2.Xor)); + } + else + { + EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Xor)); + } } public static void Not_V(AILEmitterCtx Context) @@ -114,7 +129,14 @@ namespace ChocolArm64.Instruction public static void Orr_V(AILEmitterCtx Context) { - EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Or)); + if (AOptimizations.UseSse2) + { + EmitSse2Call(Context, nameof(Sse2.Or)); + } + else + { + EmitVectorBinaryOpZx(Context, () => Context.Emit(OpCodes.Or)); + } } public static void Orr_Vi(AILEmitterCtx Context) diff --git a/ChocolArm64/Instruction/AInstEmitSimdMove.cs b/ChocolArm64/Instruction/AInstEmitSimdMove.cs index 20268d58..95fe5949 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdMove.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdMove.cs @@ -234,21 +234,21 @@ namespace ChocolArm64.Instruction switch (Op.Size) { - case 1: ASoftFallback.EmitCall(Context, - nameof(ASoftFallback.Tbl1_V64), - nameof(ASoftFallback.Tbl1_V128)); break; + case 1: AVectorHelper.EmitCall(Context, + nameof(AVectorHelper.Tbl1_V64), + nameof(AVectorHelper.Tbl1_V128)); break; - case 2: ASoftFallback.EmitCall(Context, - nameof(ASoftFallback.Tbl2_V64), - nameof(ASoftFallback.Tbl2_V128)); break; + case 2: AVectorHelper.EmitCall(Context, + nameof(AVectorHelper.Tbl2_V64), + nameof(AVectorHelper.Tbl2_V128)); break; - case 3: ASoftFallback.EmitCall(Context, - nameof(ASoftFallback.Tbl3_V64), - nameof(ASoftFallback.Tbl3_V128)); break; + case 3: AVectorHelper.EmitCall(Context, + nameof(AVectorHelper.Tbl3_V64), + nameof(AVectorHelper.Tbl3_V128)); break; - case 4: ASoftFallback.EmitCall(Context, - nameof(ASoftFallback.Tbl4_V64), - nameof(ASoftFallback.Tbl4_V128)); break; + case 4: AVectorHelper.EmitCall(Context, + nameof(AVectorHelper.Tbl4_V64), + nameof(AVectorHelper.Tbl4_V128)); break; default: throw new InvalidOperationException(); } diff --git a/ChocolArm64/Instruction/ASoftFallback.cs b/ChocolArm64/Instruction/ASoftFallback.cs index 497605a4..8ed55e20 100644 --- a/ChocolArm64/Instruction/ASoftFallback.cs +++ b/ChocolArm64/Instruction/ASoftFallback.cs @@ -1,20 +1,11 @@ -using ChocolArm64.State; using ChocolArm64.Translation; using System; using System.Numerics; -using System.Runtime.CompilerServices; namespace ChocolArm64.Instruction { static class ASoftFallback { - public static void EmitCall(AILEmitterCtx Context, string Name64, string Name128) - { - bool IsSimd64 = Context.CurrOp.RegisterSize == ARegisterSize.SIMD64; - - Context.EmitCall(typeof(ASoftFallback), IsSimd64 ? Name64 : Name128); - } - public static void EmitCall(AILEmitterCtx Context, string MthdName) { Context.EmitCall(typeof(ASoftFallback), MthdName); @@ -160,78 +151,6 @@ namespace ChocolArm64.Instruction throw new ArgumentException(nameof(Size)); } - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int SatF32ToS32(float Value) - { - if (float.IsNaN(Value)) return 0; - - return Value > int.MaxValue ? int.MaxValue : - Value < int.MinValue ? int.MinValue : (int)Value; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static long SatF32ToS64(float Value) - { - if (float.IsNaN(Value)) return 0; - - return Value > long.MaxValue ? long.MaxValue : - Value < long.MinValue ? long.MinValue : (long)Value; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static uint SatF32ToU32(float Value) - { - if (float.IsNaN(Value)) return 0; - - return Value > uint.MaxValue ? uint.MaxValue : - Value < uint.MinValue ? uint.MinValue : (uint)Value; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static ulong SatF32ToU64(float Value) - { - if (float.IsNaN(Value)) return 0; - - return Value > ulong.MaxValue ? ulong.MaxValue : - Value < ulong.MinValue ? ulong.MinValue : (ulong)Value; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static int SatF64ToS32(double Value) - { - if (double.IsNaN(Value)) return 0; - - return Value > int.MaxValue ? int.MaxValue : - Value < int.MinValue ? int.MinValue : (int)Value; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static long SatF64ToS64(double Value) - { - if (double.IsNaN(Value)) return 0; - - return Value > long.MaxValue ? long.MaxValue : - Value < long.MinValue ? long.MinValue : (long)Value; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static uint SatF64ToU32(double Value) - { - if (double.IsNaN(Value)) return 0; - - return Value > uint.MaxValue ? uint.MaxValue : - Value < uint.MinValue ? uint.MinValue : (uint)Value; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static ulong SatF64ToU64(double Value) - { - if (double.IsNaN(Value)) return 0; - - return Value > ulong.MaxValue ? ulong.MaxValue : - Value < ulong.MinValue ? ulong.MinValue : (ulong)Value; - } - public static long SMulHi128(long LHS, long RHS) { return (long)(BigInteger.Multiply(LHS, RHS) >> 64); @@ -241,239 +160,5 @@ namespace ChocolArm64.Instruction { return (ulong)(BigInteger.Multiply(LHS, RHS) >> 64); } - - public static int CountSetBits8(byte Value) - { - return ((Value >> 0) & 1) + ((Value >> 1) & 1) + - ((Value >> 2) & 1) + ((Value >> 3) & 1) + - ((Value >> 4) & 1) + ((Value >> 5) & 1) + - ((Value >> 6) & 1) + (Value >> 7); - } - - public static float MaxF(float val1, float val2) - { - if (val1 == 0.0 && val2 == 0.0) - { - if (BitConverter.SingleToInt32Bits(val1) < 0 && BitConverter.SingleToInt32Bits(val2) < 0) - return -0.0f; - - return 0.0f; - } - - if (val1 > val2) - return val1; - - if (float.IsNaN(val1)) - return val1; - - return val2; - } - - public static double Max(double val1, double val2) - { - if (val1 == 0.0 && val2 == 0.0) - { - if (BitConverter.DoubleToInt64Bits(val1) < 0 && BitConverter.DoubleToInt64Bits(val2) < 0) - return -0.0; - - return 0.0; - } - - if (val1 > val2) - return val1; - - if (double.IsNaN(val1)) - return val1; - - return val2; - } - - public static float MinF(float val1, float val2) - { - if (val1 == 0.0 && val2 == 0.0) - { - if (BitConverter.SingleToInt32Bits(val1) < 0 || BitConverter.SingleToInt32Bits(val2) < 0) - return -0.0f; - - return 0.0f; - } - - if (val1 < val2) - return val1; - - if (float.IsNaN(val1)) - return val1; - - return val2; - } - - public static double Min(double val1, double val2) - { - if (val1 == 0.0 && val2 == 0.0) - { - if (BitConverter.DoubleToInt64Bits(val1) < 0 || BitConverter.DoubleToInt64Bits(val2) < 0) - return -0.0; - - return 0.0; - } - - if (val1 < val2) - return val1; - - if (double.IsNaN(val1)) - return val1; - - return val2; - } - - public static float RoundF(float Value, int Fpcr) - { - switch ((ARoundMode)((Fpcr >> 22) & 3)) - { - case ARoundMode.ToNearest: return MathF.Round (Value); - case ARoundMode.TowardsPlusInfinity: return MathF.Ceiling (Value); - case ARoundMode.TowardsMinusInfinity: return MathF.Floor (Value); - case ARoundMode.TowardsZero: return MathF.Truncate(Value); - } - - throw new InvalidOperationException(); - } - - public static double Round(double Value, int Fpcr) - { - switch ((ARoundMode)((Fpcr >> 22) & 3)) - { - case ARoundMode.ToNearest: return Math.Round (Value); - case ARoundMode.TowardsPlusInfinity: return Math.Ceiling (Value); - case ARoundMode.TowardsMinusInfinity: return Math.Floor (Value); - case ARoundMode.TowardsZero: return Math.Truncate(Value); - } - - throw new InvalidOperationException(); - } - - public static AVec Tbl1_V64(AVec Vector, AVec Tb0) - { - return Tbl(Vector, 8, Tb0); - } - - public static AVec Tbl1_V128(AVec Vector, AVec Tb0) - { - return Tbl(Vector, 16, Tb0); - } - - public static AVec Tbl2_V64(AVec Vector, AVec Tb0, AVec Tb1) - { - return Tbl(Vector, 8, Tb0, Tb1); - } - - public static AVec Tbl2_V128(AVec Vector, AVec Tb0, AVec Tb1) - { - return Tbl(Vector, 16, Tb0, Tb1); - } - - public static AVec Tbl3_V64(AVec Vector, AVec Tb0, AVec Tb1, AVec Tb2) - { - return Tbl(Vector, 8, Tb0, Tb1, Tb2); - } - - public static AVec Tbl3_V128(AVec Vector, AVec Tb0, AVec Tb1, AVec Tb2) - { - return Tbl(Vector, 16, Tb0, Tb1, Tb2); - } - - public static AVec Tbl4_V64(AVec Vector, AVec Tb0, AVec Tb1, AVec Tb2, AVec Tb3) - { - return Tbl(Vector, 8, Tb0, Tb1, Tb2, Tb3); - } - - public static AVec Tbl4_V128(AVec Vector, AVec Tb0, AVec Tb1, AVec Tb2, AVec Tb3) - { - return Tbl(Vector, 16, Tb0, Tb1, Tb2, Tb3); - } - - private static AVec Tbl(AVec Vector, int Bytes, params AVec[] Tb) - { - AVec Res = new AVec(); - - byte[] Table = new byte[Tb.Length * 16]; - - for (int Index = 0; Index < Tb.Length; Index++) - for (int Index2 = 0; Index2 < 16; Index2++) - { - Table[Index * 16 + Index2] = (byte)VectorExtractIntZx(Tb[Index], Index2, 0); - } - - for (int Index = 0; Index < Bytes; Index++) - { - byte TblIdx = (byte)VectorExtractIntZx(Vector, Index, 0); - - if (TblIdx < Table.Length) - { - Res = VectorInsertInt(Table[TblIdx], Res, Index, 0); - } - } - - return Res; - } - - public static ulong VectorExtractIntZx(AVec Vector, int Index, int Size) - { - switch (Size) - { - case 0: return Vector.ExtractByte (Index); - case 1: return Vector.ExtractUInt16(Index); - case 2: return Vector.ExtractUInt32(Index); - case 3: return Vector.ExtractUInt64(Index); - } - - throw new ArgumentOutOfRangeException(nameof(Size)); - } - - public static long VectorExtractIntSx(AVec Vector, int Index, int Size) - { - switch (Size) - { - case 0: return (sbyte)Vector.ExtractByte (Index); - case 1: return (short)Vector.ExtractUInt16(Index); - case 2: return (int)Vector.ExtractUInt32(Index); - case 3: return (long)Vector.ExtractUInt64(Index); - } - - throw new ArgumentOutOfRangeException(nameof(Size)); - } - - public static float VectorExtractSingle(AVec Vector, int Index) - { - return Vector.ExtractSingle(Index); - } - - public static double VectorExtractDouble(AVec Vector, int Index) - { - return Vector.ExtractDouble(Index); - } - - public static AVec VectorInsertSingle(float Value, AVec Vector, int Index) - { - return AVec.InsertSingle(Vector, Index, Value); - } - - public static AVec VectorInsertDouble(double Value, AVec Vector, int Index) - { - return AVec.InsertDouble(Vector, Index, Value); - } - - public static AVec VectorInsertInt(ulong Value, AVec Vector, int Index, int Size) - { - switch (Size) - { - case 0: return AVec.InsertByte (Vector, Index, (byte)Value); - case 1: return AVec.InsertUInt16(Vector, Index, (ushort)Value); - case 2: return AVec.InsertUInt32(Vector, Index, (uint)Value); - case 3: return AVec.InsertUInt64(Vector, Index, (ulong)Value); - } - - throw new ArgumentOutOfRangeException(nameof(Size)); - } } } diff --git a/ChocolArm64/Instruction/AVectorHelper.cs b/ChocolArm64/Instruction/AVectorHelper.cs new file mode 100644 index 00000000..1a213592 --- /dev/null +++ b/ChocolArm64/Instruction/AVectorHelper.cs @@ -0,0 +1,626 @@ +using ChocolArm64.State; +using ChocolArm64.Translation; +using System; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; + +namespace ChocolArm64.Instruction +{ + static class AVectorHelper + { + public static void EmitCall(AILEmitterCtx Context, string Name64, string Name128) + { + bool IsSimd64 = Context.CurrOp.RegisterSize == ARegisterSize.SIMD64; + + Context.EmitCall(typeof(AVectorHelper), IsSimd64 ? Name64 : Name128); + } + + public static void EmitCall(AILEmitterCtx Context, string MthdName) + { + Context.EmitCall(typeof(AVectorHelper), MthdName); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int SatF32ToS32(float Value) + { + if (float.IsNaN(Value)) return 0; + + return Value > int.MaxValue ? int.MaxValue : + Value < int.MinValue ? int.MinValue : (int)Value; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static long SatF32ToS64(float Value) + { + if (float.IsNaN(Value)) return 0; + + return Value > long.MaxValue ? long.MaxValue : + Value < long.MinValue ? long.MinValue : (long)Value; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static uint SatF32ToU32(float Value) + { + if (float.IsNaN(Value)) return 0; + + return Value > uint.MaxValue ? uint.MaxValue : + Value < uint.MinValue ? uint.MinValue : (uint)Value; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static ulong SatF32ToU64(float Value) + { + if (float.IsNaN(Value)) return 0; + + return Value > ulong.MaxValue ? ulong.MaxValue : + Value < ulong.MinValue ? ulong.MinValue : (ulong)Value; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int SatF64ToS32(double Value) + { + if (double.IsNaN(Value)) return 0; + + return Value > int.MaxValue ? int.MaxValue : + Value < int.MinValue ? int.MinValue : (int)Value; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static long SatF64ToS64(double Value) + { + if (double.IsNaN(Value)) return 0; + + return Value > long.MaxValue ? long.MaxValue : + Value < long.MinValue ? long.MinValue : (long)Value; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static uint SatF64ToU32(double Value) + { + if (double.IsNaN(Value)) return 0; + + return Value > uint.MaxValue ? uint.MaxValue : + Value < uint.MinValue ? uint.MinValue : (uint)Value; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static ulong SatF64ToU64(double Value) + { + if (double.IsNaN(Value)) return 0; + + return Value > ulong.MaxValue ? ulong.MaxValue : + Value < ulong.MinValue ? ulong.MinValue : (ulong)Value; + } + + public static int CountSetBits8(byte Value) + { + return ((Value >> 0) & 1) + ((Value >> 1) & 1) + + ((Value >> 2) & 1) + ((Value >> 3) & 1) + + ((Value >> 4) & 1) + ((Value >> 5) & 1) + + ((Value >> 6) & 1) + (Value >> 7); + } + + public static double Max(double LHS, double RHS) + { + if (LHS == 0.0 && RHS == 0.0) + { + if (BitConverter.DoubleToInt64Bits(LHS) < 0 && + BitConverter.DoubleToInt64Bits(RHS) < 0) + return -0.0; + + return 0.0; + } + + if (LHS > RHS) + return LHS; + + if (double.IsNaN(LHS)) + return LHS; + + return RHS; + } + + public static float MaxF(float LHS, float RHS) + { + if (LHS == 0.0 && RHS == 0.0) + { + if (BitConverter.SingleToInt32Bits(LHS) < 0 && + BitConverter.SingleToInt32Bits(RHS) < 0) + return -0.0f; + + return 0.0f; + } + + if (LHS > RHS) + return LHS; + + if (float.IsNaN(LHS)) + return LHS; + + return RHS; + } + + public static double Min(double LHS, double RHS) + { + if (LHS == 0.0 && RHS == 0.0) + { + if (BitConverter.DoubleToInt64Bits(LHS) < 0 || + BitConverter.DoubleToInt64Bits(RHS) < 0) + return -0.0; + + return 0.0; + } + + if (LHS < RHS) + return LHS; + + if (double.IsNaN(LHS)) + return LHS; + + return RHS; + } + + public static float MinF(float LHS, float RHS) + { + if (LHS == 0.0 && RHS == 0.0) + { + if (BitConverter.SingleToInt32Bits(LHS) < 0 || + BitConverter.SingleToInt32Bits(RHS) < 0) + return -0.0f; + + return 0.0f; + } + + if (LHS < RHS) + return LHS; + + if (float.IsNaN(LHS)) + return LHS; + + return RHS; + } + + public static double Round(double Value, int Fpcr) + { + switch ((ARoundMode)((Fpcr >> 22) & 3)) + { + case ARoundMode.ToNearest: return Math.Round (Value); + case ARoundMode.TowardsPlusInfinity: return Math.Ceiling (Value); + case ARoundMode.TowardsMinusInfinity: return Math.Floor (Value); + case ARoundMode.TowardsZero: return Math.Truncate(Value); + } + + throw new InvalidOperationException(); + } + + public static float RoundF(float Value, int Fpcr) + { + switch ((ARoundMode)((Fpcr >> 22) & 3)) + { + case ARoundMode.ToNearest: return MathF.Round (Value); + case ARoundMode.TowardsPlusInfinity: return MathF.Ceiling (Value); + case ARoundMode.TowardsMinusInfinity: return MathF.Floor (Value); + case ARoundMode.TowardsZero: return MathF.Truncate(Value); + } + + throw new InvalidOperationException(); + } + + public static Vector128<float> Tbl1_V64( + Vector128<float> Vector, + Vector128<float> Tb0) + { + return Tbl(Vector, 8, Tb0); + } + + public static Vector128<float> Tbl1_V128( + Vector128<float> Vector, + Vector128<float> Tb0) + { + return Tbl(Vector, 16, Tb0); + } + + public static Vector128<float> Tbl2_V64( + Vector128<float> Vector, + Vector128<float> Tb0, + Vector128<float> Tb1) + { + return Tbl(Vector, 8, Tb0, Tb1); + } + + public static Vector128<float> Tbl2_V128( + Vector128<float> Vector, + Vector128<float> Tb0, + Vector128<float> Tb1) + { + return Tbl(Vector, 16, Tb0, Tb1); + } + + public static Vector128<float> Tbl3_V64( + Vector128<float> Vector, + Vector128<float> Tb0, + Vector128<float> Tb1, + Vector128<float> Tb2) + { + return Tbl(Vector, 8, Tb0, Tb1, Tb2); + } + + public static Vector128<float> Tbl3_V128( + Vector128<float> Vector, + Vector128<float> Tb0, + Vector128<float> Tb1, + Vector128<float> Tb2) + { + return Tbl(Vector, 16, Tb0, Tb1, Tb2); + } + + public static Vector128<float> Tbl4_V64( + Vector128<float> Vector, + Vector128<float> Tb0, + Vector128<float> Tb1, + Vector128<float> Tb2, + Vector128<float> Tb3) + { + return Tbl(Vector, 8, Tb0, Tb1, Tb2, Tb3); + } + + public static Vector128<float> Tbl4_V128( + Vector128<float> Vector, + Vector128<float> Tb0, + Vector128<float> Tb1, + Vector128<float> Tb2, + Vector128<float> Tb3) + { + return Tbl(Vector, 16, Tb0, Tb1, Tb2, Tb3); + } + + private static Vector128<float> Tbl(Vector128<float> Vector, int Bytes, params Vector128<float>[] Tb) + { + Vector128<float> Res = new Vector128<float>(); + + byte[] Table = new byte[Tb.Length * 16]; + + for (byte Index = 0; Index < Tb.Length; Index++) + for (byte Index2 = 0; Index2 < 16; Index2++) + { + Table[Index * 16 + Index2] = (byte)VectorExtractIntZx(Tb[Index], Index2, 0); + } + + for (byte Index = 0; Index < Bytes; Index++) + { + byte TblIdx = (byte)VectorExtractIntZx(Vector, Index, 0); + + if (TblIdx < Table.Length) + { + Res = VectorInsertInt(Table[TblIdx], Res, Index, 0); + } + } + + return Res; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static double VectorExtractDouble(Vector128<float> Vector, byte Index) + { + return BitConverter.Int64BitsToDouble(VectorExtractIntSx(Vector, Index, 3)); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static long VectorExtractIntSx(Vector128<float> Vector, byte Index, int Size) + { + if (Sse41.IsSupported) + { + switch (Size) + { + case 0: + return (sbyte)Sse41.Extract(Sse.StaticCast<float, byte>(Vector), Index); + + case 1: + return (short)Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), Index); + + case 2: + return Sse41.Extract(Sse.StaticCast<float, int>(Vector), Index); + + case 3: + return Sse41.Extract(Sse.StaticCast<float, long>(Vector), Index); + } + + throw new ArgumentOutOfRangeException(nameof(Size)); + } + else if (Sse2.IsSupported) + { + switch (Size) + { + case 0: + return (sbyte)VectorExtractIntZx(Vector, Index, Size); + + case 1: + return (short)VectorExtractIntZx(Vector, Index, Size); + + case 2: + return (int)VectorExtractIntZx(Vector, Index, Size); + + case 3: + return (long)VectorExtractIntZx(Vector, Index, Size); + } + + throw new ArgumentOutOfRangeException(nameof(Size)); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static ulong VectorExtractIntZx(Vector128<float> Vector, byte Index, int Size) + { + if (Sse41.IsSupported) + { + switch (Size) + { + case 0: + return Sse41.Extract(Sse.StaticCast<float, byte>(Vector), Index); + + case 1: + return Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), Index); + + case 2: + return Sse41.Extract(Sse.StaticCast<float, uint>(Vector), Index); + + case 3: + return Sse41.Extract(Sse.StaticCast<float, ulong>(Vector), Index); + } + + throw new ArgumentOutOfRangeException(nameof(Size)); + } + else if (Sse2.IsSupported) + { + int ShortIdx = Size == 0 + ? Index >> 1 + : Index << (Size - 1); + + ushort Value = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)ShortIdx); + + switch (Size) + { + case 0: + return (byte)(Value >> (Index & 1) * 8); + + case 1: + return Value; + + case 2: + case 3: + { + ushort Value1 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 1)); + + if (Size == 2) + { + return (uint)(Value | (Value1 << 16)); + } + + ushort Value2 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 2)); + ushort Value3 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 3)); + + return ((ulong)Value << 0) | + ((ulong)Value1 << 16) | + ((ulong)Value2 << 32) | + ((ulong)Value3 << 48); + } + } + + throw new ArgumentOutOfRangeException(nameof(Size)); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static float VectorExtractSingle(Vector128<float> Vector, byte Index) + { + if (Sse41.IsSupported) + { + return Sse41.Extract(Vector, Index); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorInsertDouble(double Value, Vector128<float> Vector, byte Index) + { + return VectorInsertInt((ulong)BitConverter.DoubleToInt64Bits(Value), Vector, Index, 3); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorInsertInt(ulong Value, Vector128<float> Vector, byte Index, int Size) + { + if (Sse41.IsSupported) + { + switch (Size) + { + case 0: + return Sse.StaticCast<byte, float>(Sse41.Insert(Sse.StaticCast<float, byte>(Vector), (byte)Value, Index)); + + case 1: + return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(Vector), (ushort)Value, Index)); + + case 2: + return Sse.StaticCast<uint, float>(Sse41.Insert(Sse.StaticCast<float, uint>(Vector), (uint)Value, Index)); + + case 3: + return Sse.StaticCast<ulong, float>(Sse41.Insert(Sse.StaticCast<float, ulong>(Vector), Value, Index)); + } + + throw new ArgumentOutOfRangeException(nameof(Size)); + } + else if (Sse2.IsSupported) + { + Vector128<ushort> ShortVector = Sse.StaticCast<float, ushort>(Vector); + + int ShortIdx = Size == 0 + ? Index >> 1 + : Index << (Size - 1); + + switch (Size) + { + case 0: + { + ushort ShortVal = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)ShortIdx); + + int Shift = (Index & 1) * 8; + + ShortVal &= (ushort)(0xff00 >> Shift); + + ShortVal |= (ushort)((byte)Value << Shift); + + return Sse.StaticCast<ushort, float>(Sse2.Insert(ShortVector, ShortVal, (byte)ShortIdx)); + } + + case 1: + return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(Vector), (ushort)Value, Index)); + + case 2: + case 3: + { + ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 0), (byte)(ShortIdx + 0)); + ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 16), (byte)(ShortIdx + 1)); + + if (Size == 3) + { + ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 32), (byte)(ShortIdx + 2)); + ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 48), (byte)(ShortIdx + 3)); + } + + return Sse.StaticCast<ushort, float>(ShortVector); + } + } + + throw new ArgumentOutOfRangeException(nameof(Size)); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorInsertSingle(float Value, Vector128<float> Vector, byte Index) + { + if (Sse41.IsSupported) + { + return Sse41.Insert(Vector, Value, (byte)(Index << 4)); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<sbyte> VectorSingleToSByte(Vector128<float> Vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<float, sbyte>(Vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<short> VectorSingleToInt16(Vector128<float> Vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<float, short>(Vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<int> VectorSingleToInt32(Vector128<float> Vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<float, int>(Vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<long> VectorSingleToInt64(Vector128<float> Vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<float, long>(Vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<double> VectorSingleToDouble(Vector128<float> Vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<float, double>(Vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorSByteToSingle(Vector128<sbyte> Vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<sbyte, float>(Vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorInt16ToSingle(Vector128<short> Vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<short, float>(Vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorInt32ToSingle(Vector128<int> Vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<int, float>(Vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorInt64ToSingle(Vector128<long> Vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<long, float>(Vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorDoubleToSingle(Vector128<double> Vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<double, float>(Vector); + } + + throw new PlatformNotSupportedException(); + } + } +}
\ No newline at end of file |
