aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ARMeilleure/CodeGen/X86/Assembler.cs8
-rw-r--r--ARMeilleure/Decoders/OpCodeTable.cs1
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs17
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdHelper32.cs12
-rw-r--r--Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs46
5 files changed, 80 insertions, 4 deletions
diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs
index b242a171..7f19c3c4 100644
--- a/ARMeilleure/CodeGen/X86/Assembler.cs
+++ b/ARMeilleure/CodeGen/X86/Assembler.cs
@@ -273,10 +273,10 @@ namespace ARMeilleure.CodeGen.X86
Add(X86Instruction.Vblendvps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4a, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vcvtph2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3813, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vcvtps2ph, new InstructionInfo(0x000f3a1d, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
- Add(X86Instruction.Vfmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66));
- Add(X86Instruction.Vfmadd231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
- Add(X86Instruction.Vfmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66));
- Add(X86Instruction.Vfmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+ Add(X86Instruction.Vfmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vfmadd231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+ Add(X86Instruction.Vfmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vfmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
Add(X86Instruction.Vfmsub231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38ba, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Vfmsub231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38ba, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
Add(X86Instruction.Vfmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66));
diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs
index 3d3b26f9..5cf83476 100644
--- a/ARMeilleure/Decoders/OpCodeTable.cs
+++ b/ARMeilleure/Decoders/OpCodeTable.cs
@@ -820,6 +820,7 @@ namespace ARMeilleure.Decoders
SetA32("111100110x00xxxxxxxx0001xxx1xxxx", InstName.Veor, InstEmit32.Veor_I, OpCode32SimdBinary.Create);
SetA32("111100101x11xxxxxxxxxxxxxxx0xxxx", InstName.Vext, InstEmit32.Vext, OpCode32SimdExt.Create);
SetA32("<<<<11101x10xxxxxxxx101xx0x0xxxx", InstName.Vfma, InstEmit32.Vfma_S, OpCode32SimdRegS.Create);
+ SetA32("111100100x00xxxxxxxx1100xxx1xxxx", InstName.Vfma, InstEmit32.Vfma_V, OpCode32SimdReg.Create);
SetA32("<<<<11101x10xxxxxxxx101xx1x0xxxx", InstName.Vfms, InstEmit32.Vfms_S, OpCode32SimdRegS.Create);
SetA32("<<<<11101x01xxxxxxxx101xx1x0xxxx", InstName.Vfnma, InstEmit32.Vfnma_S, OpCode32SimdRegS.Create);
SetA32("<<<<11101x01xxxxxxxx101xx0x0xxxx", InstName.Vfnms, InstEmit32.Vfnms_S, OpCode32SimdRegS.Create);
diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
index d72df97c..40289520 100644
--- a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
@@ -252,6 +252,23 @@ namespace ARMeilleure.Instructions
}
}
+ public static void Vfma_V(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseFma)
+ {
+ // Vectors contain elements that are 32-bits in length always. The only thing that will change is the number of elements in a vector.
+ // The 64-bit variant will never be used.
+ EmitVectorTernaryOpF32(context, Intrinsic.X86Vfmadd231ps, Intrinsic.X86Vfmadd231pd);
+ }
+ else
+ {
+ EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3);
+ });
+ }
+ }
+
public static void Vfma_S(ArmEmitterContext context) // Fused.
{
if (Optimizations.FastFP && Optimizations.UseSse2)
diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
index 75aa7220..2d5d4ba9 100644
--- a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
@@ -820,6 +820,18 @@ namespace ARMeilleure.Instructions
});
}
+ public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32;
+
+ EmitVectorTernaryOpSimd32(context, (d, n, m) =>
+ {
+ return context.AddIntrinsic(inst, d, n, m);
+ });
+ }
+
public static void EmitScalarUnaryOpSimd32(ArmEmitterContext context, Func1I scalarFunc)
{
OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs
index dc5903d5..4298bd1f 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg32.cs
@@ -293,6 +293,52 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(fpsrMask: Fpsr.Nzcv);
}
+ [Test, Pairwise, Description("VFMA.F<size> <Vd>, <Vn>, <Vm>")]
+ public void Vfma([Values(0u, 1u)] uint rd,
+ [Values(0u, 1u)] uint rn,
+ [Values(0u, 1u)] uint rm,
+ [Values(0u, 1u)] uint Q,
+ [ValueSource("_2S_F_")] ulong z,
+ [ValueSource("_2S_F_")] ulong a,
+ [ValueSource("_2S_F_")] ulong b )
+ {
+ uint opcode = 0xf2000c10;
+
+ V128 v0;
+ V128 v1;
+ V128 v2;
+
+ uint c = (uint) BitConverter.SingleToInt32Bits(z);
+ uint d = (uint) BitConverter.SingleToInt32Bits(a);
+ uint e = (uint) BitConverter.SingleToInt32Bits(b);
+ if (Q == 0)
+ {
+ opcode |= (((rm & 0x1) << 5) | (rm & 0x1e) >> 1);
+ opcode |= (((rd & 0x1) << 22) | (rd & 0x1e) << 11);
+ opcode |= (((rn & 0x1) << 7) | (rn & 0x1e) >> 15);
+
+ v0 = MakeVectorE0E1(c, c);
+ v1 = MakeVectorE0E1(d, c);
+ v2 = MakeVectorE0E1(e, c);
+ }
+ else
+ {
+ rd = rn = rm = 0; // Needed, as these values cannot be odd values if Q == 1.
+ opcode |= (((rm & 0x10) << 1) | (rm & 0xf) << 0);
+ opcode |= (((rd & 0x10) << 18) | (rd & 0xf) << 12);
+ opcode |= (((rn & 0x10) << 3) | (rn & 0xf) << 16);
+
+ v0 = MakeVectorE0E1E2E3(c, c, d, e);
+ v1 = MakeVectorE0E1E2E3(d, c, e, c);
+ v2 = MakeVectorE0E1E2E3(e, c, d, c);
+ }
+
+ opcode |= ((Q & 1) << 6);
+
+ SingleOpcode(opcode, v0: v0, v1: v1, v2: v2);
+ CompareAgainstUnicorn();
+ }
+
[Test, Pairwise, Description("VFNMA.F<size> <Vd>, <Vn>, <Vm>")]
public void Vfnma([Values(0u, 1u)] uint rd,
[Values(0u, 1u)] uint rn,