diff options
| author | LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> | 2020-08-13 07:34:02 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-08-13 02:34:02 -0300 |
| commit | 6938988427e7f96adcd8fe76fe5d0a19b014b2b2 (patch) | |
| tree | 39b049344e5d00b5f152b5354e7c8090ebf46c41 /ARMeilleure | |
| parent | 1ad9045c6b00a5c729c8c7d697f3da54ed177883 (diff) | |
Fix Vcvt_FI & Vcvt_RM; Add Vfma_S & Vfms_S. Add Tests. (#1471)
* Fix Vcvt_FI & Vcvt_RM; Add Vfma_S & Vfms_S. Add Tests.
* Address PR feedback & Nit.
Diffstat (limited to 'ARMeilleure')
| -rw-r--r-- | ARMeilleure/Decoders/OpCode32SimdCvtFI.cs | 14 | ||||
| -rw-r--r-- | ARMeilleure/Decoders/OpCode32SimdS.cs | 9 | ||||
| -rw-r--r-- | ARMeilleure/Decoders/OpCodeTable.cs | 12 | ||||
| -rw-r--r-- | ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs | 38 | ||||
| -rw-r--r-- | ARMeilleure/Instructions/InstEmitSimdCvt32.cs | 16 | ||||
| -rw-r--r-- | ARMeilleure/Instructions/InstEmitSimdHelper32.cs | 2 | ||||
| -rw-r--r-- | ARMeilleure/Instructions/InstEmitSimdMove32.cs | 2 | ||||
| -rw-r--r-- | ARMeilleure/Instructions/InstName.cs | 2 | ||||
| -rw-r--r-- | ARMeilleure/Translation/PTC/Ptc.cs | 2 |
9 files changed, 75 insertions, 22 deletions
diff --git a/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs b/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs index aaedcb3c..b654a192 100644 --- a/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs +++ b/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs @@ -2,12 +2,20 @@ { class OpCode32SimdCvtFI : OpCode32SimdS { - public int Opc2 { get; private set; } - public OpCode32SimdCvtFI(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { - Opc2 = (opCode >> 16) & 0x7; Opc = (opCode >> 7) & 0x1; + + bool toInteger = (Opc2 & 0b100) != 0; + + if (toInteger) + { + Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e); + } + else + { + Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e); + } } } } diff --git a/ARMeilleure/Decoders/OpCode32SimdS.cs b/ARMeilleure/Decoders/OpCode32SimdS.cs index 2e860d9c..766cf4ba 100644 --- a/ARMeilleure/Decoders/OpCode32SimdS.cs +++ b/ARMeilleure/Decoders/OpCode32SimdS.cs @@ -2,14 +2,17 @@ { class OpCode32SimdS : OpCode32, IOpCode32Simd { - public int Vd { get; private set; } - public int Vm { get; private set; } - public int Opc { get; protected set; } + public int Vd { get; protected set; } + public int Vm { get; protected set; } + public int Opc { get; protected set; } // "with_zero" (Opc<1>) [Vcmp, Vcmpe]. + public int Opc2 { get; private set; } // opc2 or RM (opc2<1:0>) [Vcvt, Vrint]. public int Size { get; protected set; } public OpCode32SimdS(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { Opc = (opCode >> 15) & 0x3; + Opc2 = (opCode >> 16) & 0x7; + Size = (opCode >> 8) & 0x3; bool single = Size != 3; diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index bbcc15ba..4daccfdb 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -825,15 +825,17 @@ namespace ARMeilleure.Decoders SetA32("<<<<11101x11010xxxxx101x01x0xxxx", InstName.Vcmp, InstEmit32.Vcmp, typeof(OpCode32SimdS)); SetA32("<<<<11101x11010xxxxx101x11x0xxxx", InstName.Vcmpe, InstEmit32.Vcmpe, typeof(OpCode32SimdS)); SetA32("<<<<11101x110111xxxx101x11x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FD, typeof(OpCode32SimdS)); // FP 32 and 64, scalar. - SetA32("<<<<11101x11110xxxxx10xx11x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, typeof(OpCode32SimdCvtFI)); // FP32 to int. - SetA32("<<<<11101x111000xxxx10xxx1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, typeof(OpCode32SimdCvtFI)); // Int to FP32. - SetA32("111111101x1111xxxxxx10>>x1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_R, typeof(OpCode32SimdCvtFI)); // The many FP32 to int encodings (fp). + SetA32("<<<<11101x11110xxxxx101x11x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, typeof(OpCode32SimdCvtFI)); // FP32 to int. + SetA32("<<<<11101x111000xxxx101xx1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, typeof(OpCode32SimdCvtFI)); // Int to FP32. + SetA32("111111101x1111xxxxxx101xx1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_RM, typeof(OpCode32SimdCvtFI)); // The many FP32 to int encodings (fp). SetA32("111100111x111011xxxx011xxxx0xxxx", InstName.Vcvt, InstEmit32.Vcvt_V, typeof(OpCode32SimdCmpZ)); // FP and integer, vector. SetA32("<<<<11101x00xxxxxxxx101xx0x0xxxx", InstName.Vdiv, InstEmit32.Vdiv_S, typeof(OpCode32SimdRegS)); SetA32("<<<<11101xx0xxxxxxxx1011x0x10000", InstName.Vdup, InstEmit32.Vdup, typeof(OpCode32SimdDupGP)); SetA32("111100111x11xxxxxxxx11000xx0xxxx", InstName.Vdup, InstEmit32.Vdup_1, typeof(OpCode32SimdDupElem)); SetA32("111100110x00xxxxxxxx0001xxx1xxxx", InstName.Veor, InstEmit32.Veor_I, typeof(OpCode32SimdBinary)); SetA32("111100101x11xxxxxxxxxxxxxxx0xxxx", InstName.Vext, InstEmit32.Vext, typeof(OpCode32SimdExt)); + SetA32("<<<<11101x10xxxxxxxx101xx0x0xxxx", InstName.Vfma, InstEmit32.Vfma_S, typeof(OpCode32SimdRegS)); + SetA32("<<<<11101x10xxxxxxxx101xx1x0xxxx", InstName.Vfms, InstEmit32.Vfms_S, typeof(OpCode32SimdRegS)); SetA32("111101001x10xxxxxxxxxx00xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemSingle)); SetA32("111101000x10xxxxxxxx0111xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemPair)); // Regs = 1. SetA32("111101000x10xxxxxxxx1010xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemPair)); // Regs = 2. @@ -918,8 +920,8 @@ namespace ARMeilleure.Decoders SetA32("111100111x111011xxxx010x0xx0xxxx", InstName.Vrecpe, InstEmit32.Vrecpe, typeof(OpCode32SimdSqrte)); SetA32("111100100x00xxxxxxxx1111xxx1xxxx", InstName.Vrecps, InstEmit32.Vrecps, typeof(OpCode32SimdReg)); SetA32("111100111x11xx00xxxx000<<xx0xxxx", InstName.Vrev, InstEmit32.Vrev, typeof(OpCode32SimdRev)); - SetA32("111111101x1110xxxxxx101x01x0xxxx", InstName.Vrint, InstEmit32.Vrint_RM, typeof(OpCode32SimdCvtFI)); - SetA32("<<<<11101x110110xxxx101x11x0xxxx", InstName.Vrint, InstEmit32.Vrint_Z, typeof(OpCode32SimdCvtFI)); + SetA32("111111101x1110xxxxxx101x01x0xxxx", InstName.Vrint, InstEmit32.Vrint_RM, typeof(OpCode32SimdS)); + SetA32("<<<<11101x110110xxxx101x11x0xxxx", InstName.Vrint, InstEmit32.Vrint_Z, typeof(OpCode32SimdS)); SetA32("1111001x1x>>>xxxxxxx0010>xx1xxxx", InstName.Vrshr, InstEmit32.Vrshr, typeof(OpCode32SimdShImm)); SetA32("111100111x111011xxxx010x1xx0xxxx", InstName.Vrsqrte, InstEmit32.Vrsqrte, typeof(OpCode32SimdSqrte)); SetA32("111100100x10xxxxxxxx1111xxx1xxxx", InstName.Vrsqrts, InstEmit32.Vrsqrts, typeof(OpCode32SimdReg)); diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs index f7f3d47e..57176794 100644 --- a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs @@ -231,6 +231,38 @@ namespace ARMeilleure.Instructions } } + public static void Vfma_S(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + // TODO: Use FMA instruction set. + EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd); + } + else + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3); + }); + } + } + + public static void Vfms_S(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + // TODO: Use FMA instruction set. + EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd); + } + else + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3); + }); + } + } + public static void Vmov_S(ArmEmitterContext context) { if (Optimizations.FastFP && Optimizations.UseSse2) @@ -586,7 +618,8 @@ namespace ARMeilleure.Instructions { EmitScalarTernaryOpF32(context, (op1, op2, op3) => { - return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3); + Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3); + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, res); }); } } @@ -657,7 +690,8 @@ namespace ARMeilleure.Instructions { EmitScalarTernaryOpF32(context, (op1, op2, op3) => { - return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3); + Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3); + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, res); }); } } diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs index 00b8ffd6..e4efea70 100644 --- a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs @@ -139,6 +139,7 @@ namespace ARMeilleure.Instructions } } + // VCVT (floating-point to integer, floating-point) | VCVT (integer to floating-point, floating-point). public static void Vcvt_FI(ArmEmitterContext context) { OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; @@ -236,13 +237,14 @@ namespace ARMeilleure.Instructions return roundMode; } - public static void Vcvt_R(ArmEmitterContext context) + // VCVTA/M/N/P (floating-point). + public static void Vcvt_RM(ArmEmitterContext context) { - OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; + OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; // toInteger == true (opCode<18> == 1 => Opc2<2> == 1). OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32; - bool unsigned = (op.Opc & 1) == 0; + bool unsigned = op.Opc == 0; int rm = op.Opc2 & 3; if (Optimizations.UseSse41 && rm != 0b00) @@ -277,9 +279,10 @@ namespace ARMeilleure.Instructions } } + // VRINTA/M/N/P (floating-point). public static void Vrint_RM(ArmEmitterContext context) { - OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32; @@ -320,9 +323,10 @@ namespace ARMeilleure.Instructions } } + // VRINTZ (floating-point). public static void Vrint_Z(ArmEmitterContext context) { - IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; if (Optimizations.UseSse2) { @@ -355,7 +359,7 @@ namespace ARMeilleure.Instructions private static void EmitSse41ConvertInt32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed) { // A port of the similar round function in InstEmitSimdCvt. - OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; bool doubleSize = (op.Size & 1) != 0; int shift = doubleSize ? 1 : 2; diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs index e045c601..a962c0fc 100644 --- a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs @@ -906,7 +906,7 @@ namespace ARMeilleure.Instructions OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; bool doubleSize = (op.Size & 1) != 0; - int shift = doubleSize ? 1 : 2; + Intrinsic inst1 = doubleSize ? inst64pt1 : inst32pt1; Intrinsic inst2 = doubleSize ? inst64pt2 : inst32pt2; diff --git a/ARMeilleure/Instructions/InstEmitSimdMove32.cs b/ARMeilleure/Instructions/InstEmitSimdMove32.cs index b484381f..52292242 100644 --- a/ARMeilleure/Instructions/InstEmitSimdMove32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdMove32.cs @@ -559,7 +559,7 @@ namespace ARMeilleure.Instructions } } - public static void EmitVectorShuffleOpSimd32(ArmEmitterContext context, Func<Operand, Operand, (Operand, Operand)> shuffleFunc) + private static void EmitVectorShuffleOpSimd32(ArmEmitterContext context, Func<Operand, Operand, (Operand, Operand)> shuffleFunc) { OpCode32Simd op = (OpCode32Simd)context.CurrOp; diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs index d7283029..9e820f6b 100644 --- a/ARMeilleure/Instructions/InstName.cs +++ b/ARMeilleure/Instructions/InstName.cs @@ -563,6 +563,8 @@ namespace ARMeilleure.Instructions Vdup, Veor, Vext, + Vfma, + Vfms, Vld1, Vld2, Vld3, diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs index 2ff98f85..ccb3f705 100644 --- a/ARMeilleure/Translation/PTC/Ptc.cs +++ b/ARMeilleure/Translation/PTC/Ptc.cs @@ -20,7 +20,7 @@ namespace ARMeilleure.Translation.PTC { private const string HeaderMagic = "PTChd"; - private const int InternalVersion = 20; //! To be incremented manually for each change to the ARMeilleure project. + private const int InternalVersion = 1471; //! To be incremented manually for each change to the ARMeilleure project. private const string BaseDir = "Ryujinx"; |
