From 063fae50fe25388d10e9ec1915c561dc0f4d519d Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Sun, 15 Jul 2018 05:53:26 +0200 Subject: Fix EmitHighNarrow(), EmitSaturatingNarrowOp() when Rd == Rn || Rd == Rm (& Part != 0). Optimization of EmitVectorTranspose(), EmitVectorUnzip(), EmitVectorZip() algorithms (reduction of the number of operations and their complexity). Add 12 Tests about Trn1/2, Uzp1/2, Zip1/2 (V) instructions. (#268) * Update CpuTestSimdArithmetic.cs * Update CpuTestSimd.cs * Update CpuTestSimdReg.cs * Update Instructions.cs * Update AInstEmitSimdArithmetic.cs * Update AInstEmitSimdHelper.cs * Update AInstEmitSimdMove.cs * Delete CpuTestSimdMove.cs --- ChocolArm64/Instruction/AInstEmitSimdMove.cs | 47 +++++++++++++++------------- 1 file changed, 25 insertions(+), 22 deletions(-) (limited to 'ChocolArm64/Instruction/AInstEmitSimdMove.cs') diff --git a/ChocolArm64/Instruction/AInstEmitSimdMove.cs b/ChocolArm64/Instruction/AInstEmitSimdMove.cs index 739f01c6..592cab73 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdMove.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdMove.cs @@ -331,17 +331,18 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Op.GetBitsCount() >> 3; - - int Elems = Bytes >> Op.Size; + int Words = Op.GetBitsCount() >> 4; + int Pairs = Words >> Op.Size; - for (int Index = 0; Index < Elems; Index++) + for (int Index = 0; Index < Pairs; Index++) { - int Elem = (Index & ~1) + Part; + int Idx = Index << 1; - EmitVectorExtractZx(Context, (Index & 1) == 0 ? Op.Rn : Op.Rm, Elem, Op.Size); + EmitVectorExtractZx(Context, Op.Rn, Idx + Part, Op.Size); + EmitVectorExtractZx(Context, Op.Rm, Idx + Part, Op.Size); - EmitVectorInsertTmp(Context, Index, Op.Size); + EmitVectorInsertTmp(Context, Idx + 1, Op.Size); + EmitVectorInsertTmp(Context, Idx , Op.Size); } Context.EmitLdvectmp(); @@ -357,18 +358,18 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Op.GetBitsCount() >> 3; + int Words = Op.GetBitsCount() >> 4; + int Pairs = Words >> Op.Size; - int Elems = Bytes >> Op.Size; - int Half = Elems >> 1; - - for (int Index = 0; Index < Elems; Index++) + for (int Index = 0; Index < Pairs; Index++) { - int Elem = Part + ((Index & (Half - 1)) << 1); + int Idx = Index << 1; - EmitVectorExtractZx(Context, Index < Half ? Op.Rn : Op.Rm, Elem, Op.Size); + EmitVectorExtractZx(Context, Op.Rn, Idx + Part, Op.Size); + EmitVectorExtractZx(Context, Op.Rm, Idx + Part, Op.Size); - EmitVectorInsertTmp(Context, Index, Op.Size); + EmitVectorInsertTmp(Context, Pairs + Index, Op.Size); + EmitVectorInsertTmp(Context, Index, Op.Size); } Context.EmitLdvectmp(); @@ -384,18 +385,20 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Op.GetBitsCount() >> 3; + int Words = Op.GetBitsCount() >> 4; + int Pairs = Words >> Op.Size; - int Elems = Bytes >> Op.Size; - int Half = Elems >> 1; + int Base = Part != 0 ? Pairs : 0; - for (int Index = 0; Index < Elems; Index++) + for (int Index = 0; Index < Pairs; Index++) { - int Elem = Part * Half + (Index >> 1); + int Idx = Index << 1; - EmitVectorExtractZx(Context, (Index & 1) == 0 ? Op.Rn : Op.Rm, Elem, Op.Size); + EmitVectorExtractZx(Context, Op.Rn, Base + Index, Op.Size); + EmitVectorExtractZx(Context, Op.Rm, Base + Index, Op.Size); - EmitVectorInsertTmp(Context, Index, Op.Size); + EmitVectorInsertTmp(Context, Idx + 1, Op.Size); + EmitVectorInsertTmp(Context, Idx , Op.Size); } Context.EmitLdvectmp(); -- cgit v1.2.3