From 8bf102d2cd744f56e2a4839fa0391acda3e201b8 Mon Sep 17 00:00:00 2001 From: Ac_K Date: Tue, 30 Jan 2024 00:51:05 +0100 Subject: Cpu: Implement Vpadal and Vrintr instructions (#6185) * Cpu: Implement Vpadal and Vrintr instructions This PR superseed last instructions left in #2242. Since I'm not a CPU guy I've just ported the code and nothing more. Please be precise during review if there are some changes to be done. It should fixes #1781 Co-Authored-By: Piyachet Kanda * Addresses gdkchan's feedback * Addresses gdkchan's feedback 2 * Apply suggestions from code review Co-authored-by: gdkchan * another fix * Update InstEmitSimdHelper32.cs * Correct fix * Addresses gdkchan's feedback * Update CpuTestSimdCvt32.cs --------- Co-authored-by: Piyachet Kanda Co-authored-by: gdkchan --- src/ARMeilleure/Decoders/OpCodeTable.cs | 2 ++ .../Instructions/InstEmitSimdArithmetic32.cs | 7 ++++++ src/ARMeilleure/Instructions/InstEmitSimdCvt32.cs | 16 ++++++++++++ .../Instructions/InstEmitSimdHelper32.cs | 29 ++++++++++++++++++++++ src/ARMeilleure/Instructions/InstName.cs | 2 ++ 5 files changed, 56 insertions(+) (limited to 'src/ARMeilleure') diff --git a/src/ARMeilleure/Decoders/OpCodeTable.cs b/src/ARMeilleure/Decoders/OpCodeTable.cs index 528cef1b..edc00412 100644 --- a/src/ARMeilleure/Decoders/OpCodeTable.cs +++ b/src/ARMeilleure/Decoders/OpCodeTable.cs @@ -875,6 +875,7 @@ namespace ARMeilleure.Decoders SetVfp("<<<<11100x10xxxxxxxx101xx1x0xxxx", InstName.Vnmul, InstEmit32.Vnmul_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32); SetVfp("111111101x1110xxxxxx101x01x0xxxx", InstName.Vrint, InstEmit32.Vrint_RM, OpCode32SimdS.Create, OpCode32SimdS.CreateT32); SetVfp("<<<<11101x110110xxxx101x11x0xxxx", InstName.Vrint, InstEmit32.Vrint_Z, OpCode32SimdS.Create, OpCode32SimdS.CreateT32); + SetVfp("<<<<11101x110110xxxx101x01x0xxxx", InstName.Vrintr, InstEmit32.Vrintr_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32); SetVfp("<<<<11101x110111xxxx101x01x0xxxx", InstName.Vrintx, InstEmit32.Vrintx_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32); SetVfp("<<<<11101x110001xxxx101x11x0xxxx", InstName.Vsqrt, InstEmit32.Vsqrt_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32); SetVfp("111111100xxxxxxxxxxx101xx0x0xxxx", InstName.Vsel, InstEmit32.Vsel, OpCode32SimdSel.Create, OpCode32SimdSel.CreateT32); @@ -995,6 +996,7 @@ namespace ARMeilleure.Decoders SetAsimd("1111001x1x000xxxxxxx< context.Add(context.Add(op1, op2), op3), op.Opc != 1); + } + public static void Vpaddl(ArmEmitterContext context) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/src/ARMeilleure/Instructions/InstEmitSimdCvt32.cs index 630e114c..8eef6b14 100644 --- a/src/ARMeilleure/Instructions/InstEmitSimdCvt32.cs +++ b/src/ARMeilleure/Instructions/InstEmitSimdCvt32.cs @@ -578,6 +578,22 @@ namespace ARMeilleure.Instructions } } + // VRINTR (floating-point). + public static void Vrintr_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintiS); + } + else + { + EmitScalarUnaryOpF32(context, (op1) => + { + return EmitRoundByRMode(context, op1); + }); + } + } + // VRINTZ (floating-point). public static void Vrint_Z(ArmEmitterContext context) { diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/src/ARMeilleure/Instructions/InstEmitSimdHelper32.cs index c1c59b87..2f021a1a 100644 --- a/src/ARMeilleure/Instructions/InstEmitSimdHelper32.cs +++ b/src/ARMeilleure/Instructions/InstEmitSimdHelper32.cs @@ -673,6 +673,35 @@ namespace ARMeilleure.Instructions context.Copy(GetVecA32(op.Qd), res); } + public static void EmitVectorPairwiseTernaryLongOpI32(ArmEmitterContext context, Func3I emit, bool signed) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + int elems = op.GetBytesCount() >> op.Size; + int pairs = elems >> 1; + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index * 2; + Operand m1 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex, op.Size, signed); + Operand m2 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex + 1, op.Size, signed); + + if (op.Size == 2) + { + m1 = signed ? context.SignExtend32(OperandType.I64, m1) : context.ZeroExtend32(OperandType.I64, m1); + m2 = signed ? context.SignExtend32(OperandType.I64, m2) : context.ZeroExtend32(OperandType.I64, m2); + } + + Operand d1 = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size + 1, signed); + + res = EmitVectorInsert(context, res, emit(m1, m2, d1), op.Id + index, op.Size + 1); + } + + context.Copy(GetVecA32(op.Qd), res); + } + // Narrow public static void EmitVectorUnaryNarrowOp32(ArmEmitterContext context, Func1I emit, bool signed = false) diff --git a/src/ARMeilleure/Instructions/InstName.cs b/src/ARMeilleure/Instructions/InstName.cs index 6723a42e..457abbf4 100644 --- a/src/ARMeilleure/Instructions/InstName.cs +++ b/src/ARMeilleure/Instructions/InstName.cs @@ -637,6 +637,7 @@ namespace ARMeilleure.Instructions Vorn, Vorr, Vpadd, + Vpadal, Vpaddl, Vpmax, Vpmin, @@ -656,6 +657,7 @@ namespace ARMeilleure.Instructions Vrintm, Vrintn, Vrintp, + Vrintr, Vrintx, Vrshr, Vrshrn, -- cgit v1.2.3