diff options
| author | LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> | 2020-12-17 20:43:41 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-12-17 20:43:41 +0100 |
| commit | 8a33e884f8f482e93e2b90380b158c1417cc50f8 (patch) | |
| tree | 65eabad1c3a78d2a3bd7bf7992413fa78056178f /Ryujinx.Tests | |
| parent | b5c215111de665ef8d18b38405ac55e17996e30e (diff) | |
Fix Vnmls_S fast path (F64: losing input d value). Fix Vnmla_S & Vnmls_S slow paths (using fused inst.s). Fix Vfma_V slow path not using StandardFPSCRValue(). (#1775)
* Fix Vnmls_S fast path (F64: losing input d value). Fix Vnmla_S & Vnmls_S slow paths (using fused inst.s).
Add Vfma_S & Vfms_S Fma fast paths.
Add Vfnma_S inst. with Fma/Sse fast paths and slow path.
Add Vfnms_S Sse fast path.
Add Tests for affected inst.s.
Nits.
* InternalVersion = 1775
* Nits.
* Fix Vfma_V slow path not using StandardFPSCRValue().
* Nit: Fix Vfma_V order.
* Add Vfms_V Sse fast path and slow path.
* Add Vfma_V and Vfms_V Test.
Diffstat (limited to 'Ryujinx.Tests')
| -rw-r--r-- | Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs | 59 | ||||
| -rw-r--r-- | Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs | 235 |
2 files changed, 175 insertions, 119 deletions
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs index 565d231a..395f2464 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdCvt32.cs @@ -22,41 +22,45 @@ namespace Ryujinx.Tests.Cpu 0x80000000u, 0xFFFFFFFFu }; } - private static IEnumerable<uint> _1S_F_() + private static IEnumerable<ulong> _1S_F_() { - yield return 0xFF7FFFFFu; // -Max Normal (float.MinValue) - yield return 0x80800000u; // -Min Normal - yield return 0x807FFFFFu; // -Max Subnormal - yield return 0x80000001u; // -Min Subnormal (-float.Epsilon) - yield return 0x7F7FFFFFu; // +Max Normal (float.MaxValue) - yield return 0x00800000u; // +Min Normal - yield return 0x007FFFFFu; // +Max Subnormal - yield return 0x00000001u; // +Min Subnormal (float.Epsilon) + yield return 0x00000000FF7FFFFFul; // -Max Normal (float.MinValue) + yield return 0x0000000080800000ul; // -Min Normal + yield return 0x00000000807FFFFFul; // -Max Subnormal + yield return 0x0000000080000001ul; // -Min Subnormal (-float.Epsilon) + yield return 0x000000007F7FFFFFul; // +Max Normal (float.MaxValue) + yield return 0x0000000000800000ul; // +Min Normal + yield return 0x00000000007FFFFFul; // +Max Subnormal + yield return 0x0000000000000001ul; // +Min Subnormal (float.Epsilon) if (!NoZeros) { - yield return 0x80000000u; // -Zero - yield return 0x00000000u; // +Zero + yield return 0x0000000080000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero } if (!NoInfs) { - yield return 0xFF800000u; // -Infinity - yield return 0x7F800000u; // +Infinity + yield return 0x00000000FF800000ul; // -Infinity + yield return 0x000000007F800000ul; // +Infinity } if (!NoNaNs) { - yield return 0xFFC00000u; // -QNaN (all zeros payload) (float.NaN) - yield return 0xFFBFFFFFu; // -SNaN (all ones payload) - yield return 0x7FC00000u; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN) - yield return 0x7FBFFFFFu; // +SNaN (all ones payload) + yield return 0x00000000FFC00000ul; // -QNaN (all zeros payload) (float.NaN) + yield return 0x00000000FFBFFFFFul; // -SNaN (all ones payload) + yield return 0x000000007FC00000ul; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN) + yield return 0x000000007FBFFFFFul; // +SNaN (all ones payload) } for (int cnt = 1; cnt <= RndCnt; cnt++) { - yield return GenNormalS(); - yield return GenSubnormalS(); + ulong grbg = TestContext.CurrentContext.Random.NextUInt(); + ulong rnd1 = GenNormalS(); + ulong rnd2 = GenSubnormalS(); + + yield return (grbg << 32) | rnd1; + yield return (grbg << 32) | rnd2; } } @@ -93,8 +97,11 @@ namespace Ryujinx.Tests.Cpu for (int cnt = 1; cnt <= RndCnt; cnt++) { - yield return GenNormalD(); - yield return GenSubnormalD(); + ulong rnd1 = GenNormalD(); + ulong rnd2 = GenSubnormalD(); + + yield return rnd1; + yield return rnd2; } } #endregion @@ -109,10 +116,10 @@ namespace Ryujinx.Tests.Cpu [Test, Pairwise, Description("VCVT.<dt>.F32 <Sd>, <Sm>")] public void Vcvt_F32_I32([Values(0u, 1u, 2u, 3u)] uint rd, [Values(0u, 1u, 2u, 3u)] uint rm, - [ValueSource(nameof(_1S_F_))] uint s0, - [ValueSource(nameof(_1S_F_))] uint s1, - [ValueSource(nameof(_1S_F_))] uint s2, - [ValueSource(nameof(_1S_F_))] uint s3, + [ValueSource(nameof(_1S_F_))] ulong s0, + [ValueSource(nameof(_1S_F_))] ulong s1, + [ValueSource(nameof(_1S_F_))] ulong s2, + [ValueSource(nameof(_1S_F_))] ulong s3, [Values] bool unsigned) // <U32, S32> { uint opcode = 0xeebc0ac0u; // VCVT.U32.F32 S0, S0 @@ -125,7 +132,7 @@ namespace Ryujinx.Tests.Cpu opcode |= ((rd & 0x1e) << 11) | ((rd & 0x1) << 22); opcode |= ((rm & 0x1e) >> 1) | ((rm & 0x1) << 5); - V128 v0 = MakeVectorE0E1E2E3(s0, s1, s2, s3); + V128 v0 = MakeVectorE0E1E2E3((uint)s0, (uint)s1, (uint)s2, (uint)s3); SingleOpcode(opcode, v0: v0); diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs index 4298bd1f..e8298521 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs @@ -22,6 +22,59 @@ namespace Ryujinx.Tests.Cpu }; } + private static uint[] _Vfma_Vfms_Vfnma_Vfnms_S_F32_() + { + return new uint[] + { + 0xEEA00A00u, // VFMA. F32 S0, S0, S0 + 0xEEA00A40u, // VFMS. F32 S0, S0, S0 + 0xEE900A40u, // VFNMA.F32 S0, S0, S0 + 0xEE900A00u // VFNMS.F32 S0, S0, S0 + }; + } + + private static uint[] _Vfma_Vfms_Vfnma_Vfnms_S_F64_() + { + return new uint[] + { + 0xEEA00B00u, // VFMA. F64 D0, D0, D0 + 0xEEA00B40u, // VFMS. F64 D0, D0, D0 + 0xEE900B40u, // VFNMA.F64 D0, D0, D0 + 0xEE900B00u // VFNMS.F64 D0, D0, D0 + }; + } + + private static uint[] _Vfma_Vfms_V_F32_() + { + return new uint[] + { + 0xF2000C10u, // VFMA.F32 D0, D0, D0 + 0xF2200C10u // VFMS.F32 D0, D0, D0 + }; + } + + private static uint[] _Vmla_Vmls_Vnmla_Vnmls_S_F32_() + { + return new uint[] + { + 0xEE000A00u, // VMLA. F32 S0, S0, S0 + 0xEE000A40u, // VMLS. F32 S0, S0, S0 + 0xEE100A40u, // VNMLA.F32 S0, S0, S0 + 0xEE100A00u // VNMLS.F32 S0, S0, S0 + }; + } + + private static uint[] _Vmla_Vmls_Vnmla_Vnmls_S_F64_() + { + return new uint[] + { + 0xEE000B00u, // VMLA. F64 D0, D0, D0 + 0xEE000B40u, // VMLS. F64 D0, D0, D0 + 0xEE100B40u, // VNMLA.F64 D0, D0, D0 + 0xEE100B00u // VNMLS.F64 D0, D0, D0 + }; + } + private static uint[] _Vp_Add_Max_Min_F_() { return new uint[] @@ -184,8 +237,8 @@ namespace Ryujinx.Tests.Cpu private const int RndCnt = 2; private static readonly bool NoZeros = false; - private static readonly bool NoInfs = true; - private static readonly bool NoNaNs = true; + private static readonly bool NoInfs = false; + private static readonly bool NoNaNs = false; [Explicit] [Test, Pairwise, Description("VADD.f32 V0, V0, V0")] @@ -293,119 +346,115 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(fpsrMask: Fpsr.Nzcv); } - [Test, Pairwise, Description("VFMA.F<size> <Vd>, <Vn>, <Vm>")] - public void Vfma([Values(0u, 1u)] uint rd, - [Values(0u, 1u)] uint rn, - [Values(0u, 1u)] uint rm, - [Values(0u, 1u)] uint Q, - [ValueSource("_2S_F_")] ulong z, - [ValueSource("_2S_F_")] ulong a, - [ValueSource("_2S_F_")] ulong b ) + [Test, Pairwise] [Explicit] // Fused. + public void Vfma_Vfms_Vfnma_Vfnms_S_F32([ValueSource(nameof(_Vfma_Vfms_Vfnma_Vfnms_S_F32_))] uint opcode, + [Values(0u, 1u, 2u, 3u)] uint rd, + [Values(0u, 1u, 2u, 3u)] uint rn, + [Values(0u, 1u, 2u, 3u)] uint rm, + [ValueSource(nameof(_1S_F_))] ulong s0, + [ValueSource(nameof(_1S_F_))] ulong s1, + [ValueSource(nameof(_1S_F_))] ulong s2, + [ValueSource(nameof(_1S_F_))] ulong s3) { - uint opcode = 0xf2000c10; - - V128 v0; - V128 v1; - V128 v2; - - uint c = (uint) BitConverter.SingleToInt32Bits(z); - uint d = (uint) BitConverter.SingleToInt32Bits(a); - uint e = (uint) BitConverter.SingleToInt32Bits(b); - if (Q == 0) - { - opcode |= (((rm & 0x1) << 5) | (rm & 0x1e) >> 1); - opcode |= (((rd & 0x1) << 22) | (rd & 0x1e) << 11); - opcode |= (((rn & 0x1) << 7) | (rn & 0x1e) >> 15); - - v0 = MakeVectorE0E1(c, c); - v1 = MakeVectorE0E1(d, c); - v2 = MakeVectorE0E1(e, c); - } - else - { - rd = rn = rm = 0; // Needed, as these values cannot be odd values if Q == 1. - opcode |= (((rm & 0x10) << 1) | (rm & 0xf) << 0); - opcode |= (((rd & 0x10) << 18) | (rd & 0xf) << 12); - opcode |= (((rn & 0x10) << 3) | (rn & 0xf) << 16); + opcode |= (((rd & 0x1) << 22) | (rd & 0x1e) << 11); + opcode |= (((rn & 0x1) << 7) | (rn & 0x1e) << 15); + opcode |= (((rm & 0x1) << 5) | (rm & 0x1e) >> 1); - v0 = MakeVectorE0E1E2E3(c, c, d, e); - v1 = MakeVectorE0E1E2E3(d, c, e, c); - v2 = MakeVectorE0E1E2E3(e, c, d, c); - } + V128 v0 = MakeVectorE0E1E2E3((uint)s0, (uint)s1, (uint)s2, (uint)s3); - opcode |= ((Q & 1) << 6); + SingleOpcode(opcode, v0: v0); - SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); CompareAgainstUnicorn(); } - - [Test, Pairwise, Description("VFNMA.F<size> <Vd>, <Vn>, <Vm>")] - public void Vfnma([Values(0u, 1u)] uint rd, - [Values(0u, 1u)] uint rn, - [Values(0u, 1u)] uint rm, - [Values(2u, 3u)] uint size, - [ValueSource("_2S_F_")] ulong z, - [ValueSource("_2S_F_")] ulong a, - [ValueSource("_2S_F_")] ulong b) + + [Test, Pairwise] [Explicit] // Fused. + public void Vfma_Vfms_Vfnma_Vfnms_S_F64([ValueSource(nameof(_Vfma_Vfms_Vfnma_Vfnms_S_F64_))] uint opcode, + [Values(0u, 1u)] uint rd, + [Values(0u, 1u)] uint rn, + [Values(0u, 1u)] uint rm, + [ValueSource(nameof(_1D_F_))] ulong d0, + [ValueSource(nameof(_1D_F_))] ulong d1) { - uint opcode = 0xe900840; + opcode |= (((rd & 0x10) << 18) | (rd & 0xf) << 12); + opcode |= (((rn & 0x10) << 3) | (rn & 0xf) << 16); + opcode |= (((rm & 0x10) << 1) | (rm & 0xf) << 0); - if (size == 2) - { - opcode |= (((rm & 0x1) << 5) | (rm & 0x1e) >> 1); - opcode |= (((rd & 0x1) << 22) | (rd & 0x1e) << 11); - opcode |= (((rn & 0x1) << 7) | (rn & 0x1e) >> 15); + V128 v0 = MakeVectorE0E1(d0, d1); - } - else + SingleOpcode(opcode, v0: v0); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] [Explicit] // Fused. + public void Vfma_Vfms_V_F32([ValueSource(nameof(_Vfma_Vfms_V_F32_))] uint opcode, + [Values(0u, 1u, 2u, 3u)] uint rd, + [Values(0u, 1u, 2u, 3u)] uint rn, + [Values(0u, 1u, 2u, 3u)] uint rm, + [ValueSource(nameof(_2S_F_))] ulong d0, + [ValueSource(nameof(_2S_F_))] ulong d1, + [ValueSource(nameof(_2S_F_))] ulong d2, + [ValueSource(nameof(_2S_F_))] ulong d3, + [Values] bool q) + { + if (q) { - opcode |= (((rm & 0x10) << 1) | (rm & 0xf) << 0); - opcode |= (((rd & 0x10) << 18) | (rd & 0xf) << 12); - opcode |= (((rn & 0x10) << 3) | (rn & 0xf) << 16); + opcode |= 1 << 6; + + rd >>= 1; rd <<= 1; + rn >>= 1; rn <<= 1; + rm >>= 1; rm <<= 1; } - opcode |= ((size & 3) << 8); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3); + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); - V128 v0 = MakeVectorE0E1(z, z); - V128 v1 = MakeVectorE0E1(a, z); - V128 v2 = MakeVectorE0E1(b, z); + V128 v0 = MakeVectorE0E1(d0, d1); + V128 v1 = MakeVectorE0E1(d2, d3); + + SingleOpcode(opcode, v0: v0, v1: v1); - SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); CompareAgainstUnicorn(); } - [Test, Pairwise, Description("VFNMS.F<size> <Vd>, <Vn>, <Vm>")] - public void Vfnms([Values(0u, 1u)] uint rd, - [Values(0u, 1u)] uint rn, - [Values(0u, 1u)] uint rm, - [Values(2u, 3u)] uint size, - [ValueSource("_2S_F_")] ulong z, - [ValueSource("_2S_F_")] ulong a, - [ValueSource("_2S_F_")] ulong b) + [Test, Pairwise] [Explicit] + public void Vmla_Vmls_Vnmla_Vnmls_S_F32([ValueSource(nameof(_Vmla_Vmls_Vnmla_Vnmls_S_F32_))] uint opcode, + [Values(0u, 1u, 2u, 3u)] uint rd, + [Values(0u, 1u, 2u, 3u)] uint rn, + [Values(0u, 1u, 2u, 3u)] uint rm, + [ValueSource(nameof(_1S_F_))] ulong s0, + [ValueSource(nameof(_1S_F_))] ulong s1, + [ValueSource(nameof(_1S_F_))] ulong s2, + [ValueSource(nameof(_1S_F_))] ulong s3) { - uint opcode = 0xee900a00; + opcode |= (((rd & 0x1) << 22) | (rd & 0x1e) << 11); + opcode |= (((rn & 0x1) << 7) | (rn & 0x1e) << 15); + opcode |= (((rm & 0x1) << 5) | (rm & 0x1e) >> 1); - if (size == 2) - { - opcode |= (((rm & 0x1) << 5) | (rm & 0x1e) >> 1); - opcode |= (((rd & 0x1) << 22) | (rd & 0x1e) << 11); - opcode |= (((rn & 0x1) << 7) | (rn & 0x1e) >> 15); - - } - else - { - opcode |= (((rm & 0x10) << 1) | (rm & 0xf) << 0); - opcode |= (((rd & 0x10) << 18) | (rd & 0xf) << 12); - opcode |= (((rn & 0x10) << 3) | (rn & 0xf) << 16); - } + V128 v0 = MakeVectorE0E1E2E3((uint)s0, (uint)s1, (uint)s2, (uint)s3); - opcode |= ((size & 3) << 8); + SingleOpcode(opcode, v0: v0); - V128 v0 = MakeVectorE0E1(z, z); - V128 v1 = MakeVectorE0E1(a, z); - V128 v2 = MakeVectorE0E1(b, z); + CompareAgainstUnicorn(); + } + + [Test, Pairwise] [Explicit] + public void Vmla_Vmls_Vnmla_Vnmls_S_F64([ValueSource(nameof(_Vmla_Vmls_Vnmla_Vnmls_S_F64_))] uint opcode, + [Values(0u, 1u)] uint rd, + [Values(0u, 1u)] uint rn, + [Values(0u, 1u)] uint rm, + [ValueSource(nameof(_1D_F_))] ulong d0, + [ValueSource(nameof(_1D_F_))] ulong d1) + { + opcode |= (((rd & 0x10) << 18) | (rd & 0xf) << 12); + opcode |= (((rn & 0x10) << 3) | (rn & 0xf) << 16); + opcode |= (((rm & 0x10) << 1) | (rm & 0xf) << 0); + + V128 v0 = MakeVectorE0E1(d0, d1); + + SingleOpcode(opcode, v0: v0); - SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); CompareAgainstUnicorn(); } |
