diff options
| author | LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> | 2020-12-17 20:43:41 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-12-17 20:43:41 +0100 |
| commit | 8a33e884f8f482e93e2b90380b158c1417cc50f8 (patch) | |
| tree | 65eabad1c3a78d2a3bd7bf7992413fa78056178f /Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs | |
| parent | b5c215111de665ef8d18b38405ac55e17996e30e (diff) | |
Fix Vnmls_S fast path (F64: losing input d value). Fix Vnmla_S & Vnmls_S slow paths (using fused inst.s). Fix Vfma_V slow path not using StandardFPSCRValue(). (#1775)
* Fix Vnmls_S fast path (F64: losing input d value). Fix Vnmla_S & Vnmls_S slow paths (using fused inst.s).
Add Vfma_S & Vfms_S Fma fast paths.
Add Vfnma_S inst. with Fma/Sse fast paths and slow path.
Add Vfnms_S Sse fast path.
Add Tests for affected inst.s.
Nits.
* InternalVersion = 1775
* Nits.
* Fix Vfma_V slow path not using StandardFPSCRValue().
* Nit: Fix Vfma_V order.
* Add Vfms_V Sse fast path and slow path.
* Add Vfma_V and Vfms_V Test.
Diffstat (limited to 'Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs')
| -rw-r--r-- | Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs | 235 |
1 files changed, 142 insertions, 93 deletions
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs index 4298bd1f..e8298521 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs @@ -22,6 +22,59 @@ namespace Ryujinx.Tests.Cpu }; } + private static uint[] _Vfma_Vfms_Vfnma_Vfnms_S_F32_() + { + return new uint[] + { + 0xEEA00A00u, // VFMA. F32 S0, S0, S0 + 0xEEA00A40u, // VFMS. F32 S0, S0, S0 + 0xEE900A40u, // VFNMA.F32 S0, S0, S0 + 0xEE900A00u // VFNMS.F32 S0, S0, S0 + }; + } + + private static uint[] _Vfma_Vfms_Vfnma_Vfnms_S_F64_() + { + return new uint[] + { + 0xEEA00B00u, // VFMA. F64 D0, D0, D0 + 0xEEA00B40u, // VFMS. F64 D0, D0, D0 + 0xEE900B40u, // VFNMA.F64 D0, D0, D0 + 0xEE900B00u // VFNMS.F64 D0, D0, D0 + }; + } + + private static uint[] _Vfma_Vfms_V_F32_() + { + return new uint[] + { + 0xF2000C10u, // VFMA.F32 D0, D0, D0 + 0xF2200C10u // VFMS.F32 D0, D0, D0 + }; + } + + private static uint[] _Vmla_Vmls_Vnmla_Vnmls_S_F32_() + { + return new uint[] + { + 0xEE000A00u, // VMLA. F32 S0, S0, S0 + 0xEE000A40u, // VMLS. F32 S0, S0, S0 + 0xEE100A40u, // VNMLA.F32 S0, S0, S0 + 0xEE100A00u // VNMLS.F32 S0, S0, S0 + }; + } + + private static uint[] _Vmla_Vmls_Vnmla_Vnmls_S_F64_() + { + return new uint[] + { + 0xEE000B00u, // VMLA. F64 D0, D0, D0 + 0xEE000B40u, // VMLS. F64 D0, D0, D0 + 0xEE100B40u, // VNMLA.F64 D0, D0, D0 + 0xEE100B00u // VNMLS.F64 D0, D0, D0 + }; + } + private static uint[] _Vp_Add_Max_Min_F_() { return new uint[] @@ -184,8 +237,8 @@ namespace Ryujinx.Tests.Cpu private const int RndCnt = 2; private static readonly bool NoZeros = false; - private static readonly bool NoInfs = true; - private static readonly bool NoNaNs = true; + private static readonly bool NoInfs = false; + private static readonly bool NoNaNs = false; [Explicit] [Test, Pairwise, Description("VADD.f32 V0, V0, V0")] @@ -293,119 +346,115 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(fpsrMask: Fpsr.Nzcv); } - [Test, Pairwise, Description("VFMA.F<size> <Vd>, <Vn>, <Vm>")] - public void Vfma([Values(0u, 1u)] uint rd, - [Values(0u, 1u)] uint rn, - [Values(0u, 1u)] uint rm, - [Values(0u, 1u)] uint Q, - [ValueSource("_2S_F_")] ulong z, - [ValueSource("_2S_F_")] ulong a, - [ValueSource("_2S_F_")] ulong b ) + [Test, Pairwise] [Explicit] // Fused. + public void Vfma_Vfms_Vfnma_Vfnms_S_F32([ValueSource(nameof(_Vfma_Vfms_Vfnma_Vfnms_S_F32_))] uint opcode, + [Values(0u, 1u, 2u, 3u)] uint rd, + [Values(0u, 1u, 2u, 3u)] uint rn, + [Values(0u, 1u, 2u, 3u)] uint rm, + [ValueSource(nameof(_1S_F_))] ulong s0, + [ValueSource(nameof(_1S_F_))] ulong s1, + [ValueSource(nameof(_1S_F_))] ulong s2, + [ValueSource(nameof(_1S_F_))] ulong s3) { - uint opcode = 0xf2000c10; - - V128 v0; - V128 v1; - V128 v2; - - uint c = (uint) BitConverter.SingleToInt32Bits(z); - uint d = (uint) BitConverter.SingleToInt32Bits(a); - uint e = (uint) BitConverter.SingleToInt32Bits(b); - if (Q == 0) - { - opcode |= (((rm & 0x1) << 5) | (rm & 0x1e) >> 1); - opcode |= (((rd & 0x1) << 22) | (rd & 0x1e) << 11); - opcode |= (((rn & 0x1) << 7) | (rn & 0x1e) >> 15); - - v0 = MakeVectorE0E1(c, c); - v1 = MakeVectorE0E1(d, c); - v2 = MakeVectorE0E1(e, c); - } - else - { - rd = rn = rm = 0; // Needed, as these values cannot be odd values if Q == 1. - opcode |= (((rm & 0x10) << 1) | (rm & 0xf) << 0); - opcode |= (((rd & 0x10) << 18) | (rd & 0xf) << 12); - opcode |= (((rn & 0x10) << 3) | (rn & 0xf) << 16); + opcode |= (((rd & 0x1) << 22) | (rd & 0x1e) << 11); + opcode |= (((rn & 0x1) << 7) | (rn & 0x1e) << 15); + opcode |= (((rm & 0x1) << 5) | (rm & 0x1e) >> 1); - v0 = MakeVectorE0E1E2E3(c, c, d, e); - v1 = MakeVectorE0E1E2E3(d, c, e, c); - v2 = MakeVectorE0E1E2E3(e, c, d, c); - } + V128 v0 = MakeVectorE0E1E2E3((uint)s0, (uint)s1, (uint)s2, (uint)s3); - opcode |= ((Q & 1) << 6); + SingleOpcode(opcode, v0: v0); - SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); CompareAgainstUnicorn(); } - - [Test, Pairwise, Description("VFNMA.F<size> <Vd>, <Vn>, <Vm>")] - public void Vfnma([Values(0u, 1u)] uint rd, - [Values(0u, 1u)] uint rn, - [Values(0u, 1u)] uint rm, - [Values(2u, 3u)] uint size, - [ValueSource("_2S_F_")] ulong z, - [ValueSource("_2S_F_")] ulong a, - [ValueSource("_2S_F_")] ulong b) + + [Test, Pairwise] [Explicit] // Fused. + public void Vfma_Vfms_Vfnma_Vfnms_S_F64([ValueSource(nameof(_Vfma_Vfms_Vfnma_Vfnms_S_F64_))] uint opcode, + [Values(0u, 1u)] uint rd, + [Values(0u, 1u)] uint rn, + [Values(0u, 1u)] uint rm, + [ValueSource(nameof(_1D_F_))] ulong d0, + [ValueSource(nameof(_1D_F_))] ulong d1) { - uint opcode = 0xe900840; + opcode |= (((rd & 0x10) << 18) | (rd & 0xf) << 12); + opcode |= (((rn & 0x10) << 3) | (rn & 0xf) << 16); + opcode |= (((rm & 0x10) << 1) | (rm & 0xf) << 0); - if (size == 2) - { - opcode |= (((rm & 0x1) << 5) | (rm & 0x1e) >> 1); - opcode |= (((rd & 0x1) << 22) | (rd & 0x1e) << 11); - opcode |= (((rn & 0x1) << 7) | (rn & 0x1e) >> 15); + V128 v0 = MakeVectorE0E1(d0, d1); - } - else + SingleOpcode(opcode, v0: v0); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise] [Explicit] // Fused. + public void Vfma_Vfms_V_F32([ValueSource(nameof(_Vfma_Vfms_V_F32_))] uint opcode, + [Values(0u, 1u, 2u, 3u)] uint rd, + [Values(0u, 1u, 2u, 3u)] uint rn, + [Values(0u, 1u, 2u, 3u)] uint rm, + [ValueSource(nameof(_2S_F_))] ulong d0, + [ValueSource(nameof(_2S_F_))] ulong d1, + [ValueSource(nameof(_2S_F_))] ulong d2, + [ValueSource(nameof(_2S_F_))] ulong d3, + [Values] bool q) + { + if (q) { - opcode |= (((rm & 0x10) << 1) | (rm & 0xf) << 0); - opcode |= (((rd & 0x10) << 18) | (rd & 0xf) << 12); - opcode |= (((rn & 0x10) << 3) | (rn & 0xf) << 16); + opcode |= 1 << 6; + + rd >>= 1; rd <<= 1; + rn >>= 1; rn <<= 1; + rm >>= 1; rm <<= 1; } - opcode |= ((size & 3) << 8); + opcode |= ((rd & 0xf) << 12) | ((rd & 0x10) << 18); + opcode |= ((rn & 0xf) << 16) | ((rn & 0x10) << 3); + opcode |= ((rm & 0xf) << 0) | ((rm & 0x10) << 1); - V128 v0 = MakeVectorE0E1(z, z); - V128 v1 = MakeVectorE0E1(a, z); - V128 v2 = MakeVectorE0E1(b, z); + V128 v0 = MakeVectorE0E1(d0, d1); + V128 v1 = MakeVectorE0E1(d2, d3); + + SingleOpcode(opcode, v0: v0, v1: v1); - SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); CompareAgainstUnicorn(); } - [Test, Pairwise, Description("VFNMS.F<size> <Vd>, <Vn>, <Vm>")] - public void Vfnms([Values(0u, 1u)] uint rd, - [Values(0u, 1u)] uint rn, - [Values(0u, 1u)] uint rm, - [Values(2u, 3u)] uint size, - [ValueSource("_2S_F_")] ulong z, - [ValueSource("_2S_F_")] ulong a, - [ValueSource("_2S_F_")] ulong b) + [Test, Pairwise] [Explicit] + public void Vmla_Vmls_Vnmla_Vnmls_S_F32([ValueSource(nameof(_Vmla_Vmls_Vnmla_Vnmls_S_F32_))] uint opcode, + [Values(0u, 1u, 2u, 3u)] uint rd, + [Values(0u, 1u, 2u, 3u)] uint rn, + [Values(0u, 1u, 2u, 3u)] uint rm, + [ValueSource(nameof(_1S_F_))] ulong s0, + [ValueSource(nameof(_1S_F_))] ulong s1, + [ValueSource(nameof(_1S_F_))] ulong s2, + [ValueSource(nameof(_1S_F_))] ulong s3) { - uint opcode = 0xee900a00; + opcode |= (((rd & 0x1) << 22) | (rd & 0x1e) << 11); + opcode |= (((rn & 0x1) << 7) | (rn & 0x1e) << 15); + opcode |= (((rm & 0x1) << 5) | (rm & 0x1e) >> 1); - if (size == 2) - { - opcode |= (((rm & 0x1) << 5) | (rm & 0x1e) >> 1); - opcode |= (((rd & 0x1) << 22) | (rd & 0x1e) << 11); - opcode |= (((rn & 0x1) << 7) | (rn & 0x1e) >> 15); - - } - else - { - opcode |= (((rm & 0x10) << 1) | (rm & 0xf) << 0); - opcode |= (((rd & 0x10) << 18) | (rd & 0xf) << 12); - opcode |= (((rn & 0x10) << 3) | (rn & 0xf) << 16); - } + V128 v0 = MakeVectorE0E1E2E3((uint)s0, (uint)s1, (uint)s2, (uint)s3); - opcode |= ((size & 3) << 8); + SingleOpcode(opcode, v0: v0); - V128 v0 = MakeVectorE0E1(z, z); - V128 v1 = MakeVectorE0E1(a, z); - V128 v2 = MakeVectorE0E1(b, z); + CompareAgainstUnicorn(); + } + + [Test, Pairwise] [Explicit] + public void Vmla_Vmls_Vnmla_Vnmls_S_F64([ValueSource(nameof(_Vmla_Vmls_Vnmla_Vnmls_S_F64_))] uint opcode, + [Values(0u, 1u)] uint rd, + [Values(0u, 1u)] uint rn, + [Values(0u, 1u)] uint rm, + [ValueSource(nameof(_1D_F_))] ulong d0, + [ValueSource(nameof(_1D_F_))] ulong d1) + { + opcode |= (((rd & 0x10) << 18) | (rd & 0xf) << 12); + opcode |= (((rn & 0x10) << 3) | (rn & 0xf) << 16); + opcode |= (((rm & 0x10) << 1) | (rm & 0xf) << 0); + + V128 v0 = MakeVectorE0E1(d0, d1); + + SingleOpcode(opcode, v0: v0); - SingleOpcode(opcode, v0: v0, v1: v1, v2: v2); CompareAgainstUnicorn(); } |
