diff options
| author | riperiperi <rhy3756547@hotmail.com> | 2020-06-24 01:43:44 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-06-24 10:43:44 +1000 |
| commit | 9a49f8aec92f7707037f5d1e677078451d07036b (patch) | |
| tree | 247380683eb145bf39ad3820e77c7f5f86af7f6c /ARMeilleure | |
| parent | 4472196b484d7814176cd2786e603584bb9fd169 (diff) | |
Fix VMVN (immediate), Add VPMIN, VPMAX, VMVN (register) (#1303)
* Add Vmvn (register), tests for both Vmvn variants.
* Add Vpmin, Vpmax, improve Non-FastFp accuracy for Vpadd
* Rebase on top of PTC.
* Add Nopcode
* Increment PTC version.
* Fix nits.
Diffstat (limited to 'ARMeilleure')
| -rw-r--r-- | ARMeilleure/Decoders/OpCodeTable.cs | 12 | ||||
| -rw-r--r-- | ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs | 62 | ||||
| -rw-r--r-- | ARMeilleure/Instructions/InstEmitSimdMove32.cs | 18 | ||||
| -rw-r--r-- | ARMeilleure/Instructions/InstName.cs | 2 | ||||
| -rw-r--r-- | ARMeilleure/Translation/PTC/Ptc.cs | 2 |
5 files changed, 90 insertions, 6 deletions
diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs index ec7b8bd9..8567e1ce 100644 --- a/ARMeilleure/Decoders/OpCodeTable.cs +++ b/ARMeilleure/Decoders/OpCodeTable.cs @@ -704,6 +704,7 @@ namespace ARMeilleure.Decoders SetA32("<<<<0011111x0000xxxxxxxxxxxxxxxx", InstName.Mvn, InstEmit32.Mvn, typeof(OpCode32AluImm)); SetA32("<<<<0001111x0000xxxxxxxxxxx0xxxx", InstName.Mvn, InstEmit32.Mvn, typeof(OpCode32AluRsImm)); SetA32("<<<<0001111x0000xxxxxxxx0xx1xxxx", InstName.Mvn, InstEmit32.Mvn, typeof(OpCode32AluRsReg)); + SetA32("<<<<0011001000001111000000000000", InstName.Nop, InstEmit32.Nop, typeof(OpCode32)); SetA32("<<<<0011100xxxxxxxxxxxxxxxxxxxxx", InstName.Orr, InstEmit32.Orr, typeof(OpCode32AluImm)); SetA32("<<<<0001100xxxxxxxxxxxxxxxx0xxxx", InstName.Orr, InstEmit32.Orr, typeof(OpCode32AluRsImm)); SetA32("<<<<0001100xxxxxxxxxxxxx0xx1xxxx", InstName.Orr, InstEmit32.Orr, typeof(OpCode32AluRsReg)); @@ -878,9 +879,10 @@ namespace ARMeilleure.Decoders SetA32("1111001x1x<<xxxxxxx01010x1x0xxxx", InstName.Vmull, InstEmit32.Vmull_1, typeof(OpCode32SimdRegElemLong)); SetA32("1111001x1x<<xxxxxxx01100x0x0xxxx", InstName.Vmull, InstEmit32.Vmull_I, typeof(OpCode32SimdRegLong)); SetA32("111100101x00xxxxxxx01110x0x0xxxx", InstName.Vmull, InstEmit32.Vmull_I, typeof(OpCode32SimdRegLong)); // Polynomial - SetA32("1111001x1x000xxxxxxx0xx00x11xxxx", InstName.Vmvn, InstEmit32.Vmvn_I, typeof(OpCode32SimdImm)); // D/Q vector I32. - SetA32("1111001x1x000xxxxxxx10x00x11xxxx", InstName.Vmvn, InstEmit32.Vmvn_I, typeof(OpCode32SimdImm)); - SetA32("1111001x1x000xxxxxxx110x0x11xxxx", InstName.Vmvn, InstEmit32.Vmvn_I, typeof(OpCode32SimdImm)); + SetA32("111100111x110000xxxx01011xx0xxxx", InstName.Vmvn, InstEmit32.Vmvn_I, typeof(OpCode32SimdBinary)); + SetA32("1111001x1x000xxxxxxx0xx00x11xxxx", InstName.Vmvn, InstEmit32.Vmvn_II, typeof(OpCode32SimdImm)); // D/Q vector I32. + SetA32("1111001x1x000xxxxxxx10x00x11xxxx", InstName.Vmvn, InstEmit32.Vmvn_II, typeof(OpCode32SimdImm)); + SetA32("1111001x1x000xxxxxxx110x0x11xxxx", InstName.Vmvn, InstEmit32.Vmvn_II, typeof(OpCode32SimdImm)); SetA32("<<<<11101x110001xxxx101x01x0xxxx", InstName.Vneg, InstEmit32.Vneg_S, typeof(OpCode32SimdS)); SetA32("111100111x11xx01xxxx0x111xx0xxxx", InstName.Vneg, InstEmit32.Vneg_V, typeof(OpCode32Simd)); SetA32("<<<<11100x01xxxxxxxx101xx1x0xxxx", InstName.Vnmla, InstEmit32.Vnmla_S, typeof(OpCode32SimdRegS)); @@ -890,6 +892,10 @@ namespace ARMeilleure.Decoders SetA32("1111001x1x000xxxxxxx0xx10x01xxxx", InstName.Vorr, InstEmit32.Vorr_II, typeof(OpCode32SimdImm)); SetA32("111100100x<<xxxxxxxx1011x0x1xxxx", InstName.Vpadd, InstEmit32.Vpadd_I, typeof(OpCode32SimdReg)); SetA32("111100110x00xxxxxxxx1101x0x0xxxx", InstName.Vpadd, InstEmit32.Vpadd_V, typeof(OpCode32SimdReg)); + SetA32("1111001x0x<<xxxxxxxx1010x0x0xxxx", InstName.Vpmax, InstEmit32.Vpmax_I, typeof(OpCode32SimdReg)); + SetA32("111100110x00xxxxxxxx1111x0x0xxxx", InstName.Vpmax, InstEmit32.Vpmax_V, typeof(OpCode32SimdReg)); + SetA32("1111001x0x<<xxxxxxxx1010x0x1xxxx", InstName.Vpmin, InstEmit32.Vpmin_I, typeof(OpCode32SimdReg)); + SetA32("111100110x10xxxxxxxx1111x0x0xxxx", InstName.Vpmin, InstEmit32.Vpmin_V, typeof(OpCode32SimdReg)); SetA32("1111001x1x>>>xxxxxxx100101x1xxx0", InstName.Vqrshrn, InstEmit32.Vqrshrn, typeof(OpCode32SimdShImmNarrow)); SetA32("111100111x>>>xxxxxxx100001x1xxx0", InstName.Vqrshrun, InstEmit32.Vqrshrun, typeof(OpCode32SimdShImmNarrow)); SetA32("111100111x111011xxxx010x0xx0xxxx", InstName.Vrecpe, InstEmit32.Vrecpe, typeof(OpCode32SimdSqrte)); diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs index fdc1bb46..82f57d63 100644 --- a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs @@ -817,7 +817,7 @@ namespace ARMeilleure.Instructions } else { - EmitVectorPairwiseOpF32(context, (op1, op2) => context.Add(op1, op2)); + EmitVectorPairwiseOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPAddFpscr), op1, op2)); } } @@ -835,6 +835,66 @@ namespace ARMeilleure.Instructions } } + public static void Vpmax_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Maxps); + } + else + { + EmitVectorPairwiseOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat64.FPMaxFpscr), op1, op2)); + } + } + + public static void Vpmax_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + if (Optimizations.UseSsse3) + { + EmitSsse3VectorPairwiseOp32(context, op.U ? X86PmaxuInstruction : X86PmaxsInstruction); + } + else + { + EmitVectorPairwiseOpI32(context, (op1, op2) => + { + Operand greater = op.U ? context.ICompareGreaterUI(op1, op2) : context.ICompareGreater(op1, op2); + return context.ConditionalSelect(greater, op1, op2); + }, !op.U); + } + } + + public static void Vpmin_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Minps); + } + else + { + EmitVectorPairwiseOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMinFpscr), op1, op2)); + } + } + + public static void Vpmin_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + if (Optimizations.UseSsse3) + { + EmitSsse3VectorPairwiseOp32(context, op.U ? X86PminuInstruction : X86PminsInstruction); + } + else + { + EmitVectorPairwiseOpI32(context, (op1, op2) => + { + Operand greater = op.U ? context.ICompareLessUI(op1, op2) : context.ICompareLess(op1, op2); + return context.ConditionalSelect(greater, op1, op2); + }, !op.U); + } + } + public static void Vrev(ArmEmitterContext context) { OpCode32SimdRev op = (OpCode32SimdRev)context.CurrOp; diff --git a/ARMeilleure/Instructions/InstEmitSimdMove32.cs b/ARMeilleure/Instructions/InstEmitSimdMove32.cs index f11f9cc5..b484381f 100644 --- a/ARMeilleure/Instructions/InstEmitSimdMove32.cs +++ b/ARMeilleure/Instructions/InstEmitSimdMove32.cs @@ -34,7 +34,23 @@ namespace ARMeilleure.Instructions public static void Vmvn_I(ArmEmitterContext context) { - EmitVectorImmUnaryOp32(context, (op1) => context.BitwiseExclusiveOr(op1, op1)); + if (Optimizations.UseSse2) + { + EmitVectorUnaryOpSimd32(context, (op1) => + { + Operand mask = X86GetAllElements(context, -1L); + return context.AddIntrinsic(Intrinsic.X86Pandn, op1, mask); + }); + } + else + { + EmitVectorUnaryOpZx32(context, (op1) => context.BitwiseNot(op1)); + } + } + + public static void Vmvn_II(ArmEmitterContext context) + { + EmitVectorImmUnaryOp32(context, (op1) => context.BitwiseNot(op1)); } public static void Vmov_GS(ArmEmitterContext context) diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs index 9bf319aa..e4d08456 100644 --- a/ARMeilleure/Instructions/InstName.cs +++ b/ARMeilleure/Instructions/InstName.cs @@ -582,6 +582,8 @@ namespace ARMeilleure.Instructions Vnmls, Vorr, Vpadd, + Vpmax, + Vpmin, Vqrshrn, Vqrshrun, Vrev, diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs index 135a45f0..2b4059ec 100644 --- a/ARMeilleure/Translation/PTC/Ptc.cs +++ b/ARMeilleure/Translation/PTC/Ptc.cs @@ -20,7 +20,7 @@ namespace ARMeilleure.Translation.PTC { private const string HeaderMagic = "PTChd"; - private const int InternalVersion = 1; //! To be incremented manually for each change to the ARMeilleure project. + private const int InternalVersion = 2; //! To be incremented manually for each change to the ARMeilleure project. private const string BaseDir = "Ryujinx"; |
