aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>2018-10-14 04:35:16 +0200
committergdkchan <gab.dark.100@gmail.com>2018-10-13 23:35:16 -0300
commit894459fcd7797b1e38f2448797d83856d11b6e23 (patch)
tree87a67e3b80cba4b05a29d243db63d130e1b362c2
parentac1a379265d0c02a8bd4a146c205f21e2d00f3ab (diff)
Add Fmls_Se, Fmulx_Se/Ve, Smov_S Inst.; Opt. Clz/Clz_V, Cnt_V, Shl_V, S/Ushr_V, S/Usra_V Inst.; Add 11 Tests. Some fixes. (#449)
* Update AOpCodeTable.cs * Update AInstEmitSimdMove.cs * Update AInstEmitSimdArithmetic.cs * Update AInstEmitSimdShift.cs * Update ASoftFallback.cs * Update ASoftFloat.cs * Update AOpCodeSimdRegElemF.cs * Update CpuTestSimdIns.cs * Update CpuTestSimdRegElem.cs * Create CpuTestSimdRegElemF.cs * Update CpuTestSimd.cs * Update CpuTestSimdReg.cs * Superseded Fmul_Se Test. Nit. * Address PR feedback. * Address PR feedback. * Update AInstEmitSimdArithmetic.cs * Update ASoftFallback.cs * Update AInstEmitAlu.cs * Update AInstEmitSimdShift.cs
-rw-r--r--ChocolArm64/AOpCodeTable.cs14
-rw-r--r--ChocolArm64/Decoder/AOpCodeSimdRegElemF.cs27
-rw-r--r--ChocolArm64/Instruction/AInstEmitAlu.cs14
-rw-r--r--ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs81
-rw-r--r--ChocolArm64/Instruction/AInstEmitSimdMove.cs14
-rw-r--r--ChocolArm64/Instruction/AInstEmitSimdShift.cs131
-rw-r--r--ChocolArm64/Instruction/ASoftFallback.cs19
-rw-r--r--ChocolArm64/Instruction/ASoftFloat.cs16
-rw-r--r--Ryujinx.Tests/Cpu/CpuTestSimd.cs58
-rw-r--r--Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs36
-rw-r--r--Ryujinx.Tests/Cpu/CpuTestSimdIns.cs109
-rw-r--r--Ryujinx.Tests/Cpu/CpuTestSimdReg.cs68
-rw-r--r--Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs137
-rw-r--r--Ryujinx.Tests/Cpu/CpuTestSimdRegElemF.cs424
14 files changed, 929 insertions, 219 deletions
diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs
index 30025712..44493298 100644
--- a/ChocolArm64/AOpCodeTable.cs
+++ b/ChocolArm64/AOpCodeTable.cs
@@ -284,11 +284,12 @@ namespace ChocolArm64
SetA64("000111100x1xxxxx011110xxxxxxxxxx", AInstEmit.Fminnm_S, typeof(AOpCodeSimdReg));
SetA64("0>0011101<1xxxxx110001xxxxxxxxxx", AInstEmit.Fminnm_V, typeof(AOpCodeSimdReg));
SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", AInstEmit.Fminp_V, typeof(AOpCodeSimdReg));
- SetA64("010111111<<xxxxx0001x0xxxxxxxxxx", AInstEmit.Fmla_Se, typeof(AOpCodeSimdRegElemF));
+ SetA64("010111111xxxxxxx0001x0xxxxxxxxxx", AInstEmit.Fmla_Se, typeof(AOpCodeSimdRegElemF));
SetA64("0>0011100<1xxxxx110011xxxxxxxxxx", AInstEmit.Fmla_V, typeof(AOpCodeSimdReg));
- SetA64("0x0011111<<xxxxx0001x0xxxxxxxxxx", AInstEmit.Fmla_Ve, typeof(AOpCodeSimdRegElemF));
+ SetA64("0>0011111<xxxxxx0001x0xxxxxxxxxx", AInstEmit.Fmla_Ve, typeof(AOpCodeSimdRegElemF));
+ SetA64("010111111xxxxxxx0101x0xxxxxxxxxx", AInstEmit.Fmls_Se, typeof(AOpCodeSimdRegElemF));
SetA64("0>0011101<1xxxxx110011xxxxxxxxxx", AInstEmit.Fmls_V, typeof(AOpCodeSimdReg));
- SetA64("0x0011111<<xxxxx0101x0xxxxxxxxxx", AInstEmit.Fmls_Ve, typeof(AOpCodeSimdRegElemF));
+ SetA64("0>0011111<xxxxxx0101x0xxxxxxxxxx", AInstEmit.Fmls_Ve, typeof(AOpCodeSimdRegElemF));
SetA64("000111100x100000010000xxxxxxxxxx", AInstEmit.Fmov_S, typeof(AOpCodeSimd));
SetA64("00011110xx1xxxxxxxx100xxxxxxxxxx", AInstEmit.Fmov_Si, typeof(AOpCodeSimdFmov));
SetA64("0xx0111100000xxx111101xxxxxxxxxx", AInstEmit.Fmov_V, typeof(AOpCodeSimdImm));
@@ -298,11 +299,13 @@ namespace ChocolArm64
SetA64("1001111010101111000000xxxxxxxxxx", AInstEmit.Fmov_Itof1, typeof(AOpCodeSimdCvt));
SetA64("000111110x0xxxxx1xxxxxxxxxxxxxxx", AInstEmit.Fmsub_S, typeof(AOpCodeSimdReg));
SetA64("000111100x1xxxxx000010xxxxxxxxxx", AInstEmit.Fmul_S, typeof(AOpCodeSimdReg));
- SetA64("010111111<<xxxxx1001x0xxxxxxxxxx", AInstEmit.Fmul_Se, typeof(AOpCodeSimdRegElemF));
+ SetA64("010111111xxxxxxx1001x0xxxxxxxxxx", AInstEmit.Fmul_Se, typeof(AOpCodeSimdRegElemF));
SetA64("0>1011100<1xxxxx110111xxxxxxxxxx", AInstEmit.Fmul_V, typeof(AOpCodeSimdReg));
- SetA64("0x0011111<<xxxxx1001x0xxxxxxxxxx", AInstEmit.Fmul_Ve, typeof(AOpCodeSimdRegElemF));
+ SetA64("0>0011111<xxxxxx1001x0xxxxxxxxxx", AInstEmit.Fmul_Ve, typeof(AOpCodeSimdRegElemF));
SetA64("010111100x1xxxxx110111xxxxxxxxxx", AInstEmit.Fmulx_S, typeof(AOpCodeSimdReg));
+ SetA64("011111111xxxxxxx1001x0xxxxxxxxxx", AInstEmit.Fmulx_Se, typeof(AOpCodeSimdRegElemF));
SetA64("0>0011100<1xxxxx110111xxxxxxxxxx", AInstEmit.Fmulx_V, typeof(AOpCodeSimdReg));
+ SetA64("0>1011111<xxxxxx1001x0xxxxxxxxxx", AInstEmit.Fmulx_Ve, typeof(AOpCodeSimdRegElemF));
SetA64("000111100x100001010000xxxxxxxxxx", AInstEmit.Fneg_S, typeof(AOpCodeSimd));
SetA64("0>1011101<100000111110xxxxxxxxxx", AInstEmit.Fneg_V, typeof(AOpCodeSimd));
SetA64("000111110x1xxxxx0xxxxxxxxxxxxxxx", AInstEmit.Fnmadd_S, typeof(AOpCodeSimdReg));
@@ -401,6 +404,7 @@ namespace ChocolArm64
SetA64("0x001110<<1xxxxx101011xxxxxxxxxx", AInstEmit.Sminp_V, typeof(AOpCodeSimdReg));
SetA64("0x001110<<1xxxxx100000xxxxxxxxxx", AInstEmit.Smlal_V, typeof(AOpCodeSimdReg));
SetA64("0x001110<<1xxxxx101000xxxxxxxxxx", AInstEmit.Smlsl_V, typeof(AOpCodeSimdReg));
+ SetA64("0x001110000xxxxx001011xxxxxxxxxx", AInstEmit.Smov_S, typeof(AOpCodeSimdIns));
SetA64("0x001110<<1xxxxx110000xxxxxxxxxx", AInstEmit.Smull_V, typeof(AOpCodeSimdReg));
SetA64("01011110xx100000011110xxxxxxxxxx", AInstEmit.Sqabs_S, typeof(AOpCodeSimd));
SetA64("0>001110<<100000011110xxxxxxxxxx", AInstEmit.Sqabs_V, typeof(AOpCodeSimd));
diff --git a/ChocolArm64/Decoder/AOpCodeSimdRegElemF.cs b/ChocolArm64/Decoder/AOpCodeSimdRegElemF.cs
index e61d7093..e0670def 100644
--- a/ChocolArm64/Decoder/AOpCodeSimdRegElemF.cs
+++ b/ChocolArm64/Decoder/AOpCodeSimdRegElemF.cs
@@ -8,15 +8,26 @@ namespace ChocolArm64.Decoder
public AOpCodeSimdRegElemF(AInst Inst, long Position, int OpCode) : base(Inst, Position, OpCode)
{
- if ((Size & 1) != 0)
+ switch ((OpCode >> 21) & 3) // sz:L
{
- Index = (OpCode >> 11) & 1;
- }
- else
- {
- Index = (OpCode >> 21) & 1 |
- (OpCode >> 10) & 2;
+ case 0: // H:0
+ Index = (OpCode >> 10) & 2; // 0, 2
+
+ break;
+
+ case 1: // H:1
+ Index = (OpCode >> 10) & 2;
+ Index++; // 1, 3
+
+ break;
+
+ case 2: // H
+ Index = (OpCode >> 11) & 1; // 0, 1
+
+ break;
+
+ default: Emitter = AInstEmit.Und; return;
}
}
}
-} \ No newline at end of file
+}
diff --git a/ChocolArm64/Instruction/AInstEmitAlu.cs b/ChocolArm64/Instruction/AInstEmitAlu.cs
index 490387e1..4551346b 100644
--- a/ChocolArm64/Instruction/AInstEmitAlu.cs
+++ b/ChocolArm64/Instruction/AInstEmitAlu.cs
@@ -4,6 +4,7 @@ using ChocolArm64.Translation;
using System;
using System.Reflection;
using System.Reflection.Emit;
+using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instruction.AInstEmitAluHelper;
@@ -117,9 +118,18 @@ namespace ChocolArm64.Instruction
Context.EmitLdintzr(Op.Rn);
- Context.EmitLdc_I4(Op.RegisterSize == ARegisterSize.Int32 ? 32 : 64);
+ if (Lzcnt.IsSupported)
+ {
+ Type TValue = Op.RegisterSize == ARegisterSize.Int32 ? typeof(uint) : typeof(ulong);
- ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountLeadingZeros));
+ Context.EmitCall(typeof(Lzcnt).GetMethod(nameof(Lzcnt.LeadingZeroCount), new Type[] { TValue }));
+ }
+ else
+ {
+ Context.EmitLdc_I4(Op.RegisterSize == ARegisterSize.Int32 ? 32 : 64);
+
+ ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountLeadingZeros));
+ }
Context.EmitStintzr(Op.Rd);
}
diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
index d11a0b84..7ba08f5e 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
@@ -83,19 +83,31 @@ namespace ChocolArm64.Instruction
public static void Cls_V(AILEmitterCtx Context)
{
- MethodInfo MthdInfo = typeof(ASoftFallback).GetMethod(nameof(ASoftFallback.CountLeadingSigns));
+ AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
- EmitCountLeadingBits(Context, () => Context.EmitCall(MthdInfo));
- }
+ int Bytes = Op.GetBitsCount() >> 3;
+ int Elems = Bytes >> Op.Size;
- public static void Clz_V(AILEmitterCtx Context)
- {
- MethodInfo MthdInfo = typeof(ASoftFallback).GetMethod(nameof(ASoftFallback.CountLeadingZeros));
+ int ESize = 8 << Op.Size;
- EmitCountLeadingBits(Context, () => Context.EmitCall(MthdInfo));
+ for (int Index = 0; Index < Elems; Index++)
+ {
+ EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size);
+
+ Context.EmitLdc_I4(ESize);
+
+ ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountLeadingSigns));
+
+ EmitVectorInsert(Context, Op.Rd, Index, Op.Size);
+ }
+
+ if (Op.RegisterSize == ARegisterSize.SIMD64)
+ {
+ EmitVectorZeroUpper(Context, Op.Rd);
+ }
}
- private static void EmitCountLeadingBits(AILEmitterCtx Context, Action Emit)
+ public static void Clz_V(AILEmitterCtx Context)
{
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
@@ -108,9 +120,20 @@ namespace ChocolArm64.Instruction
{
EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size);
- Context.EmitLdc_I4(ESize);
+ if (Lzcnt.IsSupported && ESize == 32)
+ {
+ Context.Emit(OpCodes.Conv_U4);
- Emit();
+ Context.EmitCall(typeof(Lzcnt).GetMethod(nameof(Lzcnt.LeadingZeroCount), new Type[] { typeof(uint) }));
+
+ Context.Emit(OpCodes.Conv_U8);
+ }
+ else
+ {
+ Context.EmitLdc_I4(ESize);
+
+ ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountLeadingZeros));
+ }
EmitVectorInsert(Context, Op.Rd, Index, Op.Size);
}
@@ -131,11 +154,14 @@ namespace ChocolArm64.Instruction
{
EmitVectorExtractZx(Context, Op.Rn, Index, 0);
- Context.Emit(OpCodes.Conv_U4);
-
- ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountSetBits8));
-
- Context.Emit(OpCodes.Conv_U8);
+ if (Popcnt.IsSupported)
+ {
+ Context.EmitCall(typeof(Popcnt).GetMethod(nameof(Popcnt.PopCount), new Type[] { typeof(ulong) }));
+ }
+ else
+ {
+ ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountSetBits8));
+ }
EmitVectorInsert(Context, Op.Rd, Index, 0);
}
@@ -440,6 +466,15 @@ namespace ChocolArm64.Instruction
});
}
+ public static void Fmls_Se(AILEmitterCtx Context)
+ {
+ EmitScalarTernaryOpByElemF(Context, () =>
+ {
+ Context.Emit(OpCodes.Mul);
+ Context.Emit(OpCodes.Sub);
+ });
+ }
+
public static void Fmls_V(AILEmitterCtx Context)
{
EmitVectorTernaryOpF(Context, () =>
@@ -554,6 +589,14 @@ namespace ChocolArm64.Instruction
});
}
+ public static void Fmulx_Se(AILEmitterCtx Context)
+ {
+ EmitScalarBinaryOpByElemF(Context, () =>
+ {
+ EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMulX));
+ });
+ }
+
public static void Fmulx_V(AILEmitterCtx Context)
{
EmitVectorBinaryOpF(Context, () =>
@@ -562,6 +605,14 @@ namespace ChocolArm64.Instruction
});
}
+ public static void Fmulx_Ve(AILEmitterCtx Context)
+ {
+ EmitVectorBinaryOpByElemF(Context, () =>
+ {
+ EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMulX));
+ });
+ }
+
public static void Fneg_S(AILEmitterCtx Context)
{
EmitScalarUnaryOpF(Context, () => Context.Emit(OpCodes.Neg));
diff --git a/ChocolArm64/Instruction/AInstEmitSimdMove.cs b/ChocolArm64/Instruction/AInstEmitSimdMove.cs
index 94097f48..6001f48c 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdMove.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdMove.cs
@@ -249,6 +249,17 @@ namespace ChocolArm64.Instruction
EmitVectorImmUnaryOp(Context, () => Context.Emit(OpCodes.Not));
}
+ public static void Smov_S(AILEmitterCtx Context)
+ {
+ AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp;
+
+ EmitVectorExtractSx(Context, Op.Rn, Op.DstIndex, Op.Size);
+
+ EmitIntZeroUpperIfNeeded(Context);
+
+ Context.EmitStintzr(Op.Rd);
+ }
+
public static void Tbl_V(AILEmitterCtx Context)
{
AOpCodeSimdTbl Op = (AOpCodeSimdTbl)Context.CurrOp;
@@ -421,7 +432,8 @@ namespace ChocolArm64.Instruction
private static void EmitIntZeroUpperIfNeeded(AILEmitterCtx Context)
{
- if (Context.CurrOp.RegisterSize == ARegisterSize.Int32)
+ if (Context.CurrOp.RegisterSize == ARegisterSize.Int32 ||
+ Context.CurrOp.RegisterSize == ARegisterSize.SIMD64)
{
Context.Emit(OpCodes.Conv_U4);
Context.Emit(OpCodes.Conv_U8);
diff --git a/ChocolArm64/Instruction/AInstEmitSimdShift.cs b/ChocolArm64/Instruction/AInstEmitSimdShift.cs
index 127abf1d..8918c0e1 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdShift.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdShift.cs
@@ -3,6 +3,7 @@ using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection.Emit;
+using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instruction.AInstEmitSimdHelper;
@@ -31,12 +32,32 @@ namespace ChocolArm64.Instruction
{
AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
- EmitVectorUnaryOpZx(Context, () =>
+ if (AOptimizations.UseSse2 && Op.Size > 0)
{
+ Type[] Types = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) };
+
+ EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size);
+
Context.EmitLdc_I4(GetImmShl(Op));
- Context.Emit(OpCodes.Shl);
- });
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), Types));
+
+ EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size);
+
+ if (Op.RegisterSize == ARegisterSize.SIMD64)
+ {
+ EmitVectorZeroUpper(Context, Op.Rd);
+ }
+ }
+ else
+ {
+ EmitVectorUnaryOpZx(Context, () =>
+ {
+ Context.EmitLdc_I4(GetImmShl(Op));
+
+ Context.Emit(OpCodes.Shl);
+ });
+ }
}
public static void Shll_V(AILEmitterCtx Context)
@@ -167,7 +188,30 @@ namespace ChocolArm64.Instruction
public static void Sshr_V(AILEmitterCtx Context)
{
- EmitShrImmOp(Context, ShrImmFlags.VectorSx);
+ AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
+
+ if (AOptimizations.UseSse2 && Op.Size > 0
+ && Op.Size < 3)
+ {
+ Type[] Types = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) };
+
+ EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size);
+
+ Context.EmitLdc_I4(GetImmShr(Op));
+
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), Types));
+
+ EmitStvecWithSignedCast(Context, Op.Rd, Op.Size);
+
+ if (Op.RegisterSize == ARegisterSize.SIMD64)
+ {
+ EmitVectorZeroUpper(Context, Op.Rd);
+ }
+ }
+ else
+ {
+ EmitShrImmOp(Context, ShrImmFlags.VectorSx);
+ }
}
public static void Ssra_S(AILEmitterCtx Context)
@@ -177,7 +221,33 @@ namespace ChocolArm64.Instruction
public static void Ssra_V(AILEmitterCtx Context)
{
- EmitVectorShrImmOpSx(Context, ShrImmFlags.Accumulate);
+ AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
+
+ if (AOptimizations.UseSse2 && Op.Size > 0
+ && Op.Size < 3)
+ {
+ Type[] TypesSra = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) };
+ Type[] TypesAdd = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], VectorIntTypesPerSizeLog2[Op.Size] };
+
+ EmitLdvecWithSignedCast(Context, Op.Rd, Op.Size);
+ EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size);
+
+ Context.EmitLdc_I4(GetImmShr(Op));
+
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), TypesSra));
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd));
+
+ EmitStvecWithSignedCast(Context, Op.Rd, Op.Size);
+
+ if (Op.RegisterSize == ARegisterSize.SIMD64)
+ {
+ EmitVectorZeroUpper(Context, Op.Rd);
+ }
+ }
+ else
+ {
+ EmitVectorShrImmOpSx(Context, ShrImmFlags.Accumulate);
+ }
}
public static void Uqrshrn_S(AILEmitterCtx Context)
@@ -239,7 +309,29 @@ namespace ChocolArm64.Instruction
public static void Ushr_V(AILEmitterCtx Context)
{
- EmitShrImmOp(Context, ShrImmFlags.VectorZx);
+ AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
+
+ if (AOptimizations.UseSse2 && Op.Size > 0)
+ {
+ Type[] Types = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) };
+
+ EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size);
+
+ Context.EmitLdc_I4(GetImmShr(Op));
+
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), Types));
+
+ EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size);
+
+ if (Op.RegisterSize == ARegisterSize.SIMD64)
+ {
+ EmitVectorZeroUpper(Context, Op.Rd);
+ }
+ }
+ else
+ {
+ EmitShrImmOp(Context, ShrImmFlags.VectorZx);
+ }
}
public static void Usra_S(AILEmitterCtx Context)
@@ -249,7 +341,32 @@ namespace ChocolArm64.Instruction
public static void Usra_V(AILEmitterCtx Context)
{
- EmitVectorShrImmOpZx(Context, ShrImmFlags.Accumulate);
+ AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp;
+
+ if (AOptimizations.UseSse2 && Op.Size > 0)
+ {
+ Type[] TypesSrl = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) };
+ Type[] TypesAdd = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], VectorUIntTypesPerSizeLog2[Op.Size] };
+
+ EmitLdvecWithUnsignedCast(Context, Op.Rd, Op.Size);
+ EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size);
+
+ Context.EmitLdc_I4(GetImmShr(Op));
+
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesSrl));
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd));
+
+ EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size);
+
+ if (Op.RegisterSize == ARegisterSize.SIMD64)
+ {
+ EmitVectorZeroUpper(Context, Op.Rd);
+ }
+ }
+ else
+ {
+ EmitVectorShrImmOpZx(Context, ShrImmFlags.Accumulate);
+ }
}
private static void EmitVectorShl(AILEmitterCtx Context, bool Signed)
diff --git a/ChocolArm64/Instruction/ASoftFallback.cs b/ChocolArm64/Instruction/ASoftFallback.cs
index a7bc1085..3c5c5c4d 100644
--- a/ChocolArm64/Instruction/ASoftFallback.cs
+++ b/ChocolArm64/Instruction/ASoftFallback.cs
@@ -386,7 +386,7 @@ namespace ChocolArm64.Instruction
#endregion
#region "Count"
- public static ulong CountLeadingSigns(ulong Value, int Size)
+ public static ulong CountLeadingSigns(ulong Value, int Size) // Size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
{
Value ^= Value >> 1;
@@ -405,9 +405,9 @@ namespace ChocolArm64.Instruction
private static readonly byte[] ClzNibbleTbl = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
- public static ulong CountLeadingZeros(ulong Value, int Size)
+ public static ulong CountLeadingZeros(ulong Value, int Size) // Size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.).
{
- if (Value == 0)
+ if (Value == 0ul)
{
return (ulong)Size;
}
@@ -426,12 +426,17 @@ namespace ChocolArm64.Instruction
return (ulong)Count;
}
- public static uint CountSetBits8(uint Value)
+ public static ulong CountSetBits8(ulong Value) // "Size" is 8 (SIMD&FP Inst.).
{
- Value = ((Value >> 1) & 0x55) + (Value & 0x55);
- Value = ((Value >> 2) & 0x33) + (Value & 0x33);
+ if (Value == 0xfful)
+ {
+ return 8ul;
+ }
+
+ Value = ((Value >> 1) & 0x55ul) + (Value & 0x55ul);
+ Value = ((Value >> 2) & 0x33ul) + (Value & 0x33ul);
- return (Value >> 4) + (Value & 0x0f);
+ return (Value >> 4) + (Value & 0x0ful);
}
#endregion
diff --git a/ChocolArm64/Instruction/ASoftFloat.cs b/ChocolArm64/Instruction/ASoftFloat.cs
index 7412c976..2d9a9f0e 100644
--- a/ChocolArm64/Instruction/ASoftFloat.cs
+++ b/ChocolArm64/Instruction/ASoftFloat.cs
@@ -365,8 +365,8 @@ namespace ChocolArm64.Instruction
{
Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_32.FPMaxNum: ");
- Value1.FPUnpack(out FPType Type1, out bool Sign1, out uint Op1);
- Value2.FPUnpack(out FPType Type2, out bool Sign2, out uint Op2);
+ Value1.FPUnpack(out FPType Type1, out _, out _);
+ Value2.FPUnpack(out FPType Type2, out _, out _);
if (Type1 == FPType.QNaN && Type2 != FPType.QNaN)
{
@@ -430,8 +430,8 @@ namespace ChocolArm64.Instruction
{
Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_32.FPMinNum: ");
- Value1.FPUnpack(out FPType Type1, out bool Sign1, out uint Op1);
- Value2.FPUnpack(out FPType Type2, out bool Sign2, out uint Op2);
+ Value1.FPUnpack(out FPType Type1, out _, out _);
+ Value2.FPUnpack(out FPType Type2, out _, out _);
if (Type1 == FPType.QNaN && Type2 != FPType.QNaN)
{
@@ -1091,8 +1091,8 @@ namespace ChocolArm64.Instruction
{
Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_64.FPMaxNum: ");
- Value1.FPUnpack(out FPType Type1, out bool Sign1, out ulong Op1);
- Value2.FPUnpack(out FPType Type2, out bool Sign2, out ulong Op2);
+ Value1.FPUnpack(out FPType Type1, out _, out _);
+ Value2.FPUnpack(out FPType Type2, out _, out _);
if (Type1 == FPType.QNaN && Type2 != FPType.QNaN)
{
@@ -1156,8 +1156,8 @@ namespace ChocolArm64.Instruction
{
Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_64.FPMinNum: ");
- Value1.FPUnpack(out FPType Type1, out bool Sign1, out ulong Op1);
- Value2.FPUnpack(out FPType Type2, out bool Sign2, out ulong Op2);
+ Value1.FPUnpack(out FPType Type1, out _, out _);
+ Value2.FPUnpack(out FPType Type2, out _, out _);
if (Type1 == FPType.QNaN && Type2 != FPType.QNaN)
{
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs
index 2075ccf2..279f9f0c 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs
@@ -205,6 +205,22 @@ namespace Ryujinx.Tests.Cpu
#endregion
#region "ValueSource (Opcodes)"
+ private static uint[] _F_Cvt_S_SD_()
+ {
+ return new uint[]
+ {
+ 0x1E22C020u // FCVT D0, S1
+ };
+ }
+
+ private static uint[] _F_Cvt_S_DS_()
+ {
+ return new uint[]
+ {
+ 0x1E624020u // FCVT S0, D1
+ };
+ }
+
private static uint[] _F_Cvt_NZ_SU_S_S_()
{
return new uint[]
@@ -249,7 +265,7 @@ namespace Ryujinx.Tests.Cpu
};
}
- private static uint[] _F_RecpX_Sqrt_S_S_()
+ private static uint[] _F_Recpx_Sqrt_S_S_()
{
return new uint[]
{
@@ -258,7 +274,7 @@ namespace Ryujinx.Tests.Cpu
};
}
- private static uint[] _F_RecpX_Sqrt_S_D_()
+ private static uint[] _F_Recpx_Sqrt_S_D_()
{
return new uint[]
{
@@ -785,35 +801,33 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn();
}
- [Test, Pairwise, Description("FCVT <Dd>, <Sn>")]
- public void Fcvt_S_SD([ValueSource("_1S_F_")] ulong A)
+ [Test, Pairwise] [Explicit]
+ public void F_Cvt_S_SD([ValueSource("_F_Cvt_S_SD_")] uint Opcodes,
+ [ValueSource("_1S_F_")] ulong A)
{
- uint Opcode = 0x1E22C020; // FCVT D0, S1
-
ulong Z = TestContext.CurrentContext.Random.NextULong();
Vector128<float> V0 = MakeVectorE1(Z);
Vector128<float> V1 = MakeVectorE0(A);
- AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
+ AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1);
CompareAgainstUnicorn();
}
- [Test, Pairwise, Description("FCVT <Sd>, <Dn>")]
- public void Fcvt_S_DS([ValueSource("_1D_F_")] ulong A)
+ [Test, Pairwise] [Explicit]
+ public void F_Cvt_S_DS([ValueSource("_F_Cvt_S_DS_")] uint Opcodes,
+ [ValueSource("_1D_F_")] ulong A)
{
- uint Opcode = 0x1E624020; // FCVT S0, D1
-
ulong Z = TestContext.CurrentContext.Random.NextULong();
Vector128<float> V0 = MakeVectorE0E1(Z, Z);
Vector128<float> V1 = MakeVectorE0(A);
- AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1);
+ AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1);
CompareAgainstUnicorn();
}
- [Test, Pairwise]
+ [Test, Pairwise] [Explicit]
public void F_Cvt_NZ_SU_S_S([ValueSource("_F_Cvt_NZ_SU_S_S_")] uint Opcodes,
[ValueSource("_1S_F_")] ulong A)
{
@@ -826,7 +840,7 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn();
}
- [Test, Pairwise]
+ [Test, Pairwise] [Explicit]
public void F_Cvt_NZ_SU_S_D([ValueSource("_F_Cvt_NZ_SU_S_D_")] uint Opcodes,
[ValueSource("_1D_F_")] ulong A)
{
@@ -839,7 +853,7 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn();
}
- [Test, Pairwise]
+ [Test, Pairwise] [Explicit]
public void F_Cvt_NZ_SU_V_2S_4S([ValueSource("_F_Cvt_NZ_SU_V_2S_4S_")] uint Opcodes,
[Values(0u)] uint Rd,
[Values(1u, 0u)] uint Rn,
@@ -858,7 +872,7 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn();
}
- [Test, Pairwise]
+ [Test, Pairwise] [Explicit]
public void F_Cvt_NZ_SU_V_2D([ValueSource("_F_Cvt_NZ_SU_V_2D_")] uint Opcodes,
[Values(0u)] uint Rd,
[Values(1u, 0u)] uint Rn,
@@ -875,8 +889,8 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn();
}
- [Test, Pairwise]
- public void F_RecpX_Sqrt_S_S([ValueSource("_F_RecpX_Sqrt_S_S_")] uint Opcodes,
+ [Test, Pairwise] [Explicit]
+ public void F_Recpx_Sqrt_S_S([ValueSource("_F_Recpx_Sqrt_S_S_")] uint Opcodes,
[ValueSource("_1S_F_")] ulong A)
{
ulong Z = TestContext.CurrentContext.Random.NextULong();
@@ -890,8 +904,8 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(FpsrMask: FPSR.IOC);
}
- [Test, Pairwise]
- public void F_RecpX_Sqrt_S_D([ValueSource("_F_RecpX_Sqrt_S_D_")] uint Opcodes,
+ [Test, Pairwise] [Explicit]
+ public void F_Recpx_Sqrt_S_D([ValueSource("_F_Recpx_Sqrt_S_D_")] uint Opcodes,
[ValueSource("_1D_F_")] ulong A)
{
ulong Z = TestContext.CurrentContext.Random.NextULong();
@@ -905,7 +919,7 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(FpsrMask: FPSR.IOC);
}
- [Test, Pairwise]
+ [Test, Pairwise] [Explicit]
public void F_Sqrt_V_2S_4S([ValueSource("_F_Sqrt_V_2S_4S_")] uint Opcodes,
[Values(0u)] uint Rd,
[Values(1u, 0u)] uint Rn,
@@ -926,7 +940,7 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(FpsrMask: FPSR.IOC);
}
- [Test, Pairwise]
+ [Test, Pairwise] [Explicit]
public void F_Sqrt_V_2D([ValueSource("_F_Sqrt_V_2D_")] uint Opcodes,
[Values(0u)] uint Rd,
[Values(1u, 0u)] uint Rn,
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs
index a5ae1a5f..5afeab31 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs
@@ -9,18 +9,6 @@ namespace Ryujinx.Tests.Cpu
{
public class CpuTestSimdArithmetic : CpuTest
{
- [Test, Description("FMUL S6, S1, V0.S[2]")]
- public void Fmul_Se([Random(10)] float A, [Random(10)] float B)
- {
- AThreadState ThreadState = SingleOpcode(0x5F809826,
- V1: Sse.SetVector128(0, 0, 0, A),
- V0: Sse.SetVector128(0, B, 0, 0));
-
- Assert.That(Sse41.Extract(ThreadState.V6, (byte)0), Is.EqualTo(A * B));
-
- CompareAgainstUnicorn();
- }
-
[TestCase(0x00000000u, 0x7F800000u)]
[TestCase(0x80000000u, 0xFF800000u)]
[TestCase(0x00FFF000u, 0x7E000000u)]
@@ -86,7 +74,7 @@ namespace Ryujinx.Tests.Cpu
Vector128<float> V1 = MakeVectorE0(A);
int FpcrTemp = 0x0;
- if(DefaultNaN)
+ if (DefaultNaN)
{
FpcrTemp = 0x2000000;
}
@@ -115,7 +103,7 @@ namespace Ryujinx.Tests.Cpu
Vector128<float> V1 = MakeVectorE0E1(A, B);
int FpcrTemp = 0x0;
- if(DefaultNaN)
+ if (DefaultNaN)
{
FpcrTemp = 0x2000000;
}
@@ -185,7 +173,7 @@ namespace Ryujinx.Tests.Cpu
case 'M': FpcrTemp = 0x800000; break;
case 'Z': FpcrTemp = 0xC00000; break;
}
- if(DefaultNaN)
+ if (DefaultNaN)
{
FpcrTemp |= 1 << 25;
}
@@ -241,7 +229,7 @@ namespace Ryujinx.Tests.Cpu
case 'M': FpcrTemp = 0x800000; break;
case 'Z': FpcrTemp = 0xC00000; break;
}
- if(DefaultNaN)
+ if (DefaultNaN)
{
FpcrTemp |= 1 << 25;
}
@@ -302,7 +290,7 @@ namespace Ryujinx.Tests.Cpu
Vector128<float> V1 = MakeVectorE0(A);
int FpcrTemp = 0x0;
- if(DefaultNaN)
+ if (DefaultNaN)
{
FpcrTemp = 0x2000000;
}
@@ -327,7 +315,7 @@ namespace Ryujinx.Tests.Cpu
Vector128<float> V1 = MakeVectorE0E1(A, B);
int FpcrTemp = 0x0;
- if(DefaultNaN)
+ if (DefaultNaN)
{
FpcrTemp = 0x2000000;
}
@@ -389,7 +377,7 @@ namespace Ryujinx.Tests.Cpu
Vector128<float> V1 = MakeVectorE0(A);
int FpcrTemp = 0x0;
- if(DefaultNaN)
+ if (DefaultNaN)
{
FpcrTemp = 0x2000000;
}
@@ -417,7 +405,7 @@ namespace Ryujinx.Tests.Cpu
Vector128<float> V1 = MakeVectorE0E1(A, B);
int FpcrTemp = 0x0;
- if(DefaultNaN)
+ if (DefaultNaN)
{
FpcrTemp = 0x2000000;
}
@@ -478,7 +466,7 @@ namespace Ryujinx.Tests.Cpu
Vector128<float> V1 = MakeVectorE0(A);
int FpcrTemp = 0x0;
- if(DefaultNaN)
+ if (DefaultNaN)
{
FpcrTemp = 0x2000000;
}
@@ -503,7 +491,7 @@ namespace Ryujinx.Tests.Cpu
Vector128<float> V1 = MakeVectorE0E1(A, B);
int FpcrTemp = 0x0;
- if(DefaultNaN)
+ if (DefaultNaN)
{
FpcrTemp = 0x2000000;
}
@@ -573,7 +561,7 @@ namespace Ryujinx.Tests.Cpu
case 'M': FpcrTemp = 0x800000; break;
case 'Z': FpcrTemp = 0xC00000; break;
}
- if(DefaultNaN)
+ if (DefaultNaN)
{
FpcrTemp |= 1 << 25;
}
@@ -629,7 +617,7 @@ namespace Ryujinx.Tests.Cpu
case 'M': FpcrTemp = 0x800000; break;
case 'Z': FpcrTemp = 0xC00000; break;
}
- if(DefaultNaN)
+ if (DefaultNaN)
{
FpcrTemp |= 1 << 25;
}
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs b/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs
index 387cdf5d..0b227edb 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs
@@ -14,6 +14,27 @@ namespace Ryujinx.Tests.Cpu
#if SimdIns
#region "ValueSource"
+ private static ulong[] _1D_()
+ {
+ return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul,
+ 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul };
+ }
+
+ private static ulong[] _8B4H_()
+ {
+ return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful,
+ 0x8080808080808080ul, 0x7FFF7FFF7FFF7FFFul,
+ 0x8000800080008000ul, 0xFFFFFFFFFFFFFFFFul };
+ }
+
+ private static ulong[] _8B4H2S_()
+ {
+ return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful,
+ 0x8080808080808080ul, 0x7FFF7FFF7FFF7FFFul,
+ 0x8000800080008000ul, 0x7FFFFFFF7FFFFFFFul,
+ 0x8000000080000000ul, 0xFFFFFFFFFFFFFFFFul };
+ }
+
private static uint[] _W_()
{
return new uint[] { 0x00000000u, 0x0000007Fu,
@@ -39,7 +60,7 @@ namespace Ryujinx.Tests.Cpu
[Values(0, 1, 2)] int Size, // Q0: <8B, 4H, 2S>
[Values(0b0u, 0b1u)] uint Q) // Q1: <16B, 8H, 4S>
{
- uint Imm5 = (1U << Size) & 0x1F;
+ uint Imm5 = (1u << Size) & 0x1Fu;
uint Opcode = 0x0E000C00; // RESERVED
Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
@@ -69,6 +90,92 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn();
}
+
+ [Test, Pairwise, Description("SMOV <Wd>, <Vn>.<Ts>[<index>]")]
+ public void Smov_S_W([Values(0u, 31u)] uint Rd,
+ [Values(1u)] uint Rn,
+ [ValueSource("_8B4H_")] [Random(RndCnt)] ulong A,
+ [Values(0, 1)] int Size, // <B, H>
+ [Values(0u, 1u, 2u, 3u)] uint Index)
+ {
+ uint Imm5 = (Index << (Size + 1) | 1u << Size) & 0x1Fu;
+
+ uint Opcode = 0x0E002C00; // RESERVED
+ Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+ Opcode |= (Imm5 << 16);
+
+ ulong _X0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32;
+ uint _W31 = TestContext.CurrentContext.Random.NextUInt();
+ Vector128<float> V1 = MakeVectorE0(A);
+
+ AThreadState ThreadState = SingleOpcode(Opcode, X0: _X0, X31: _W31, V1: V1);
+
+ CompareAgainstUnicorn();
+ }
+
+ [Test, Pairwise, Description("SMOV <Xd>, <Vn>.<Ts>[<index>]")]
+ public void Smov_S_X([Values(0u, 31u)] uint Rd,
+ [Values(1u)] uint Rn,
+ [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+ [Values(0, 1, 2)] int Size, // <B, H, S>
+ [Values(0u, 1u)] uint Index)
+ {
+ uint Imm5 = (Index << (Size + 1) | 1u << Size) & 0x1Fu;
+
+ uint Opcode = 0x4E002C00; // RESERVED
+ Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+ Opcode |= (Imm5 << 16);
+
+ ulong _X31 = TestContext.CurrentContext.Random.NextULong();
+ Vector128<float> V1 = MakeVectorE0(A);
+
+ AThreadState ThreadState = SingleOpcode(Opcode, X31: _X31, V1: V1);
+
+ CompareAgainstUnicorn();
+ }
+
+ [Test, Pairwise, Description("UMOV <Wd>, <Vn>.<Ts>[<index>]")]
+ public void Umov_S_W([Values(0u, 31u)] uint Rd,
+ [Values(1u)] uint Rn,
+ [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A,
+ [Values(0, 1, 2)] int Size, // <B, H, S>
+ [Values(0u, 1u)] uint Index)
+ {
+ uint Imm5 = (Index << (Size + 1) | 1u << Size) & 0x1Fu;
+
+ uint Opcode = 0x0E003C00; // RESERVED
+ Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+ Opcode |= (Imm5 << 16);
+
+ ulong _X0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32;
+ uint _W31 = TestContext.CurrentContext.Random.NextUInt();
+ Vector128<float> V1 = MakeVectorE0(A);
+
+ AThreadState ThreadState = SingleOpcode(Opcode, X0: _X0, X31: _W31, V1: V1);
+
+ CompareAgainstUnicorn();
+ }
+
+ [Test, Pairwise, Description("UMOV <Xd>, <Vn>.<Ts>[<index>]")]
+ public void Umov_S_X([Values(0u, 31u)] uint Rd,
+ [Values(1u)] uint Rn,
+ [ValueSource("_1D_")] [Random(RndCnt)] ulong A,
+ [Values(3)] int Size, // <D>
+ [Values(0u)] uint Index)
+ {
+ uint Imm5 = (Index << (Size + 1) | 1u << Size) & 0x1Fu;
+
+ uint Opcode = 0x4E003C00; // RESERVED
+ Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0);
+ Opcode |= (Imm5 << 16);
+
+ ulong _X31 = TestContext.CurrentContext.Random.NextULong();
+ Vector128<float> V1 = MakeVectorE0(A);
+
+ AThreadState ThreadState = SingleOpcode(Opcode, X31: _X31, V1: V1);
+
+ CompareAgainstUnicorn();
+ }
#endif
}
}
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs
index 7d47416f..1ea017c8 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs
@@ -204,7 +204,7 @@ namespace Ryujinx.Tests.Cpu
#endregion
#region "ValueSource (Opcodes)"
- private static uint[] _F_Add_Div_Mul_MulX_Sub_S_S_()
+ private static uint[] _F_Add_Div_Mul_Mulx_Sub_S_S_()
{
return new uint[]
{
@@ -216,7 +216,7 @@ namespace Ryujinx.Tests.Cpu
};
}
- private static uint[] _F_Add_Div_Mul_MulX_Sub_S_D_()
+ private static uint[] _F_Add_Div_Mul_Mulx_Sub_S_D_()
{
return new uint[]
{
@@ -228,7 +228,7 @@ namespace Ryujinx.Tests.Cpu
};
}
- private static uint[] _F_Add_Div_Mul_MulX_Sub_V_2S_4S_()
+ private static uint[] _F_Add_Div_Mul_Mulx_Sub_V_2S_4S_()
{
return new uint[]
{
@@ -240,7 +240,7 @@ namespace Ryujinx.Tests.Cpu
};
}
- private static uint[] _F_Add_Div_Mul_MulX_Sub_V_2D_()
+ private static uint[] _F_Add_Div_Mul_Mulx_Sub_V_2D_()
{
return new uint[]
{
@@ -252,7 +252,7 @@ namespace Ryujinx.Tests.Cpu
};
}
- private static uint[] _Fmadd_Fmsub_S_S_()
+ private static uint[] _F_Madd_Msub_S_S_()
{
return new uint[]
{
@@ -261,7 +261,7 @@ namespace Ryujinx.Tests.Cpu
};
}
- private static uint[] _Fmadd_Fmsub_S_D_()
+ private static uint[] _F_Madd_Msub_S_D_()
{
return new uint[]
{
@@ -318,7 +318,7 @@ namespace Ryujinx.Tests.Cpu
};
}
- private static uint[] _Frecps_Frsqrts_S_S_()
+ private static uint[] _F_Recps_Rsqrts_S_S_()
{
return new uint[]
{
@@ -327,7 +327,7 @@ namespace Ryujinx.Tests.Cpu
};
}
- private static uint[] _Frecps_Frsqrts_S_D_()
+ private static uint[] _F_Recps_Rsqrts_S_D_()
{
return new uint[]
{
@@ -336,7 +336,7 @@ namespace Ryujinx.Tests.Cpu
};
}
- private static uint[] _Frecps_Frsqrts_V_2S_4S_()
+ private static uint[] _F_Recps_Rsqrts_V_2S_4S_()
{
return new uint[]
{
@@ -345,7 +345,7 @@ namespace Ryujinx.Tests.Cpu
};
}
- private static uint[] _Frecps_Frsqrts_V_2D_()
+ private static uint[] _F_Recps_Rsqrts_V_2D_()
{
return new uint[]
{
@@ -1137,8 +1137,8 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn();
}
- [Test, Pairwise]
- public void F_Add_Div_Mul_MulX_Sub_S_S([ValueSource("_F_Add_Div_Mul_MulX_Sub_S_S_")] uint Opcodes,
+ [Test, Pairwise] [Explicit]
+ public void F_Add_Div_Mul_Mulx_Sub_S_S([ValueSource("_F_Add_Div_Mul_Mulx_Sub_S_S_")] uint Opcodes,
[ValueSource("_1S_F_")] ulong A,
[ValueSource("_1S_F_")] ulong B)
{
@@ -1154,8 +1154,8 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(FpsrMask: FPSR.IOC | FPSR.DZC);
}
- [Test, Pairwise]
- public void F_Add_Div_Mul_MulX_Sub_S_D([ValueSource("_F_Add_Div_Mul_MulX_Sub_S_D_")] uint Opcodes,
+ [Test, Pairwise] [Explicit]
+ public void F_Add_Div_Mul_Mulx_Sub_S_D([ValueSource("_F_Add_Div_Mul_Mulx_Sub_S_D_")] uint Opcodes,
[ValueSource("_1D_F_")] ulong A,
[ValueSource("_1D_F_")] ulong B)
{
@@ -1171,8 +1171,8 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(FpsrMask: FPSR.IOC | FPSR.DZC);
}
- [Test, Pairwise]
- public void F_Add_Div_Mul_MulX_Sub_V_2S_4S([ValueSource("_F_Add_Div_Mul_MulX_Sub_V_2S_4S_")] uint Opcodes,
+ [Test, Pairwise] [Explicit]
+ public void F_Add_Div_Mul_Mulx_Sub_V_2S_4S([ValueSource("_F_Add_Div_Mul_Mulx_Sub_V_2S_4S_")] uint Opcodes,
[Values(0u)] uint Rd,
[Values(1u, 0u)] uint Rn,
[Values(2u, 0u)] uint Rm,
@@ -1195,8 +1195,8 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(FpsrMask: FPSR.IOC | FPSR.DZC);
}
- [Test, Pairwise]
- public void F_Add_Div_Mul_MulX_Sub_V_2D([ValueSource("_F_Add_Div_Mul_MulX_Sub_V_2D_")] uint Opcodes,
+ [Test, Pairwise] [Explicit]
+ public void F_Add_Div_Mul_Mulx_Sub_V_2D([ValueSource("_F_Add_Div_Mul_Mulx_Sub_V_2D_")] uint Opcodes,
[Values(0u)] uint Rd,
[Values(1u, 0u)] uint Rn,
[Values(2u, 0u)] uint Rm,
@@ -1217,8 +1217,8 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(FpsrMask: FPSR.IOC | FPSR.DZC);
}
- [Test, Pairwise] // Fused.
- public void Fmadd_Fmsub_S_S([ValueSource("_Fmadd_Fmsub_S_S_")] uint Opcodes,
+ [Test, Pairwise] [Explicit] // Fused.
+ public void F_Madd_Msub_S_S([ValueSource("_F_Madd_Msub_S_S_")] uint Opcodes,
[ValueSource("_1S_F_")] ulong A,
[ValueSource("_1S_F_")] ulong B,
[ValueSource("_1S_F_")] ulong C)
@@ -1236,8 +1236,8 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(FPSR.IOC, FpSkips.IfUnderflow, FpTolerances.UpToOneUlps_S);
}
- [Test, Pairwise] // Fused.
- public void Fmadd_Fmsub_S_D([ValueSource("_Fmadd_Fmsub_S_D_")] uint Opcodes,
+ [Test, Pairwise] [Explicit] // Fused.
+ public void F_Madd_Msub_S_D([ValueSource("_F_Madd_Msub_S_D_")] uint Opcodes,
[ValueSource("_1D_F_")] ulong A,
[ValueSource("_1D_F_")] ulong B,
[ValueSource("_1D_F_")] ulong C)
@@ -1255,7 +1255,7 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(FPSR.IOC, FpSkips.IfUnderflow, FpTolerances.UpToOneUlps_D);
}
- [Test, Pairwise]
+ [Test, Pairwise] [Explicit]
public void F_Max_Min_Nm_S_S([ValueSource("_F_Max_Min_Nm_S_S_")] uint Opcodes,
[ValueSource("_1S_F_")] ulong A,
[ValueSource("_1S_F_")] ulong B)
@@ -1272,7 +1272,7 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(FpsrMask: FPSR.IOC);
}
- [Test, Pairwise]
+ [Test, Pairwise] [Explicit]
public void F_Max_Min_Nm_S_D([ValueSource("_F_Max_Min_Nm_S_D_")] uint Opcodes,
[ValueSource("_1D_F_")] ulong A,
[ValueSource("_1D_F_")] ulong B)
@@ -1289,7 +1289,7 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(FpsrMask: FPSR.IOC);
}
- [Test, Pairwise]
+ [Test, Pairwise] [Explicit]
public void F_Max_Min_Nm_P_V_2S_4S([ValueSource("_F_Max_Min_Nm_P_V_2S_4S_")] uint Opcodes,
[Values(0u)] uint Rd,
[Values(1u, 0u)] uint Rn,
@@ -1313,7 +1313,7 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(FpsrMask: FPSR.IOC);
}
- [Test, Pairwise]
+ [Test, Pairwise] [Explicit]
public void F_Max_Min_Nm_P_V_2D([ValueSource("_F_Max_Min_Nm_P_V_2D_")] uint Opcodes,
[Values(0u)] uint Rd,
[Values(1u, 0u)] uint Rn,
@@ -1335,8 +1335,8 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(FpsrMask: FPSR.IOC);
}
- [Test, Pairwise] // Fused.
- public void Frecps_Frsqrts_S_S([ValueSource("_Frecps_Frsqrts_S_S_")] uint Opcodes,
+ [Test, Pairwise] [Explicit] // Fused.
+ public void F_Recps_Rsqrts_S_S([ValueSource("_F_Recps_Rsqrts_S_S_")] uint Opcodes,
[ValueSource("_1S_F_")] ulong A,
[ValueSource("_1S_F_")] ulong B)
{
@@ -1352,8 +1352,8 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(FPSR.IOC, FpSkips.IfUnderflow, FpTolerances.UpToOneUlps_S);
}
- [Test, Pairwise] // Fused.
- public void Frecps_Frsqrts_S_D([ValueSource("_Frecps_Frsqrts_S_D_")] uint Opcodes,
+ [Test, Pairwise] [Explicit] // Fused.
+ public void F_Recps_Rsqrts_S_D([ValueSource("_F_Recps_Rsqrts_S_D_")] uint Opcodes,
[ValueSource("_1D_F_")] ulong A,
[ValueSource("_1D_F_")] ulong B)
{
@@ -1369,8 +1369,8 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(FPSR.IOC, FpSkips.IfUnderflow, FpTolerances.UpToOneUlps_D);
}
- [Test, Pairwise] // Fused.
- public void Frecps_Frsqrts_V_2S_4S([ValueSource("_Frecps_Frsqrts_V_2S_4S_")] uint Opcodes,
+ [Test, Pairwise] [Explicit] // Fused.
+ public void F_Recps_Rsqrts_V_2S_4S([ValueSource("_F_Recps_Rsqrts_V_2S_4S_")] uint Opcodes,
[Values(0u)] uint Rd,
[Values(1u, 0u)] uint Rn,
[Values(2u, 0u)] uint Rm,
@@ -1393,8 +1393,8 @@ namespace Ryujinx.Tests.Cpu
CompareAgainstUnicorn(FPSR.IOC, FpSkips.IfUnderflow, FpTolerances.UpToOneUlps_S);
}
- [Test, Pairwise] // Fused.
- public void Frecps_Frsqrts_V_2D([ValueSource("_Frecps_Frsqrts_V_2D_")] uint Opcodes,
+ [Test, Pairwise] [Explicit] // Fused.
+ public void F_Recps_Rsqrts_V_2D([ValueSource("_F_Recps_Rsqrts_V_2D_")] uint Opcodes,
[Values(0u)] uint Rd,
[Values(1u, 0u)] uint Rn,
[Values(2u, 0u)] uint Rm,
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs b/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs
index 4d14ab48..61552062 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs
@@ -13,7 +13,7 @@ namespace Ryujinx.Tests.Cpu
{
#if SimdRegElem
-#region "ValueSource"
+#region "ValueSource (Types)"
private static ulong[] _2S_()
{
return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFF7FFFFFFFul,
@@ -27,114 +27,81 @@ namespace Ryujinx.Tests.Cpu
}
#endregion
- private const int RndCnt = 2;
-
- [Test, Pairwise, Description("MLA <Vd>.<T>, <Vn>.<T>, <Vm>.<Ts>[<index>]")]
- public void Mla_Ve_4H_8H([Values(0u)] uint Rd,
- [Values(1u, 0u)] uint Rn,
- [Values(2u, 0u)] uint Rm,
- [ValueSource("_4H_")] [Random(RndCnt)] ulong Z,
- [ValueSource("_4H_")] [Random(RndCnt)] ulong A,
- [ValueSource("_4H_")] [Random(RndCnt)] ulong B,
- [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint Index,
- [Values(0b0u, 0b1u)] uint Q) // <4H, 8H>
+#region "ValueSource (Opcodes)"
+ private static uint[] _Mla_Mls_Mul_Ve_4H_8H_()
{
- uint H = (Index & 4) >> 2;
- uint L = (Index & 2) >> 1;
- uint M = (Index & 1) >> 0;
-
- uint Opcode = 0x2F400000; // MLA V0.4H, V0.4H, V0.H[0]
- Opcode |= ((Rm & 15) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
- Opcode |= (L << 21) | (M << 20) | (H << 11);
- Opcode |= ((Q & 1) << 30);
-
- Vector128<float> V0 = MakeVectorE0E1(Z, Z);
- Vector128<float> V1 = MakeVectorE0E1(A, A * Q);
- Vector128<float> V2 = MakeVectorE0E1(B, B * H);
-
- AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
-
- CompareAgainstUnicorn();
+ return new uint[]
+ {
+ 0x2F400000u, // MLA V0.4H, V0.4H, V0.H[0]
+ 0x2F404000u, // MLS V0.4H, V0.4H, V0.H[0]
+ 0x0F408000u // MUL V0.4H, V0.4H, V0.H[0]
+ };
}
- [Test, Pairwise, Description("MLA <Vd>.<T>, <Vn>.<T>, <Vm>.<Ts>[<index>]")]
- public void Mla_Ve_2S_4S([Values(0u)] uint Rd,
- [Values(1u, 0u)] uint Rn,
- [Values(2u, 0u)] uint Rm,
- [ValueSource("_2S_")] [Random(RndCnt)] ulong Z,
- [ValueSource("_2S_")] [Random(RndCnt)] ulong A,
- [ValueSource("_2S_")] [Random(RndCnt)] ulong B,
- [Values(0u, 1u, 2u, 3u)] uint Index,
- [Values(0b0u, 0b1u)] uint Q) // <2S, 4S>
+ private static uint[] _Mla_Mls_Mul_Ve_2S_4S_()
{
- uint H = (Index & 2) >> 1;
- uint L = (Index & 1) >> 0;
-
- uint Opcode = 0x2F800000; // MLA V0.2S, V0.2S, V0.S[0]
- Opcode |= ((Rm & 15) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
- Opcode |= (L << 21) | (H << 11);
- Opcode |= ((Q & 1) << 30);
-
- Vector128<float> V0 = MakeVectorE0E1(Z, Z);
- Vector128<float> V1 = MakeVectorE0E1(A, A * Q);
- Vector128<float> V2 = MakeVectorE0E1(B, B * H);
-
- AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
-
- CompareAgainstUnicorn();
+ return new uint[]
+ {
+ 0x2F800000u, // MLA V0.2S, V0.2S, V0.S[0]
+ 0x2F804000u, // MLS V0.2S, V0.2S, V0.S[0]
+ 0x0F808000u // MUL V0.2S, V0.2S, V0.S[0]
+ };
}
+#endregion
+
+ private const int RndCnt = 2;
- [Test, Pairwise, Description("MLS <Vd>.<T>, <Vn>.<T>, <Vm>.<Ts>[<index>]")]
- public void Mls_Ve_4H_8H([Values(0u)] uint Rd,
- [Values(1u, 0u)] uint Rn,
- [Values(2u, 0u)] uint Rm,
- [ValueSource("_4H_")] [Random(RndCnt)] ulong Z,
- [ValueSource("_4H_")] [Random(RndCnt)] ulong A,
- [ValueSource("_4H_")] [Random(RndCnt)] ulong B,
- [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint Index,
- [Values(0b0u, 0b1u)] uint Q) // <4H, 8H>
+ [Test, Pairwise]
+ public void Mla_Mls_Mul_Ve_4H_8H([ValueSource("_Mla_Mls_Mul_Ve_4H_8H_")] uint Opcodes,
+ [Values(0u)] uint Rd,
+ [Values(1u, 0u)] uint Rn,
+ [Values(2u, 0u)] uint Rm,
+ [ValueSource("_4H_")] [Random(RndCnt)] ulong Z,
+ [ValueSource("_4H_")] [Random(RndCnt)] ulong A,
+ [ValueSource("_4H_")] [Random(RndCnt)] ulong B,
+ [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint Index,
+ [Values(0b0u, 0b1u)] uint Q) // <4H, 8H>
{
- uint H = (Index & 4) >> 2;
- uint L = (Index & 2) >> 1;
- uint M = (Index & 1) >> 0;
+ uint H = (Index >> 2) & 1;
+ uint L = (Index >> 1) & 1;
+ uint M = Index & 1;
- uint Opcode = 0x2F404000; // MLS V0.4H, V0.4H, V0.H[0]
- Opcode |= ((Rm & 15) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
- Opcode |= (L << 21) | (M << 20) | (H << 11);
- Opcode |= ((Q & 1) << 30);
+ Opcodes |= ((Rm & 15) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+ Opcodes |= (L << 21) | (M << 20) | (H << 11);
+ Opcodes |= ((Q & 1) << 30);
Vector128<float> V0 = MakeVectorE0E1(Z, Z);
Vector128<float> V1 = MakeVectorE0E1(A, A * Q);
Vector128<float> V2 = MakeVectorE0E1(B, B * H);
- AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+ AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2);
CompareAgainstUnicorn();
}
- [Test, Pairwise, Description("MLS <Vd>.<T>, <Vn>.<T>, <Vm>.<Ts>[<index>]")]
- public void Mls_Ve_2S_4S([Values(0u)] uint Rd,
- [Values(1u, 0u)] uint Rn,
- [Values(2u, 0u)] uint Rm,
- [ValueSource("_2S_")] [Random(RndCnt)] ulong Z,
- [ValueSource("_2S_")] [Random(RndCnt)] ulong A,
- [ValueSource("_2S_")] [Random(RndCnt)] ulong B,
- [Values(0u, 1u, 2u, 3u)] uint Index,
- [Values(0b0u, 0b1u)] uint Q) // <2S, 4S>
+ [Test, Pairwise]
+ public void Mla_Mls_Mul_Ve_2S_4S([ValueSource("_Mla_Mls_Mul_Ve_2S_4S_")] uint Opcodes,
+ [Values(0u)] uint Rd,
+ [Values(1u, 0u)] uint Rn,
+ [Values(2u, 0u)] uint Rm,
+ [ValueSource("_2S_")] [Random(RndCnt)] ulong Z,
+ [ValueSource("_2S_")] [Random(RndCnt)] ulong A,
+ [ValueSource("_2S_")] [Random(RndCnt)] ulong B,
+ [Values(0u, 1u, 2u, 3u)] uint Index,
+ [Values(0b0u, 0b1u)] uint Q) // <2S, 4S>
{
- uint H = (Index & 2) >> 1;
- uint L = (Index & 1) >> 0;
+ uint H = (Index >> 1) & 1;
+ uint L = Index & 1;
- uint Opcode = 0x2F804000; // MLS V0.2S, V0.2S, V0.S[0]
- Opcode |= ((Rm & 15) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
- Opcode |= (L << 21) | (H << 11);
- Opcode |= ((Q & 1) << 30);
+ Opcodes |= ((Rm & 15) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+ Opcodes |= (L << 21) | (H << 11);
+ Opcodes |= ((Q & 1) << 30);
Vector128<float> V0 = MakeVectorE0E1(Z, Z);
Vector128<float> V1 = MakeVectorE0E1(A, A * Q);
Vector128<float> V2 = MakeVectorE0E1(B, B * H);
- AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2);
+ AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2);
CompareAgainstUnicorn();
}
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdRegElemF.cs b/Ryujinx.Tests/Cpu/CpuTestSimdRegElemF.cs
new file mode 100644
index 00000000..3945cce1
--- /dev/null
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdRegElemF.cs
@@ -0,0 +1,424 @@
+#define SimdRegElemF
+
+using ChocolArm64.State;
+
+using NUnit.Framework;
+
+using System.Collections.Generic;
+using System.Runtime.Intrinsics;
+
+namespace Ryujinx.Tests.Cpu
+{
+ [Category("SimdRegElemF")] // Tested: second half of 2018.
+ public sealed class CpuTestSimdRegElemF : CpuTest
+ {
+#if SimdRegElemF
+
+#region "ValueSource (Types)"
+ private static IEnumerable<ulong> _1S_F_()
+ {
+ yield return 0x00000000FF7FFFFFul; // -Max Normal (float.MinValue)
+ yield return 0x0000000080800000ul; // -Min Normal
+ yield return 0x00000000807FFFFFul; // -Max Subnormal
+ yield return 0x0000000080000001ul; // -Min Subnormal (-float.Epsilon)
+ yield return 0x000000007F7FFFFFul; // +Max Normal (float.MaxValue)
+ yield return 0x0000000000800000ul; // +Min Normal
+ yield return 0x00000000007FFFFFul; // +Max Subnormal
+ yield return 0x0000000000000001ul; // +Min Subnormal (float.Epsilon)
+
+ if (!NoZeros)
+ {
+ yield return 0x0000000080000000ul; // -Zero
+ yield return 0x0000000000000000ul; // +Zero
+ }
+
+ if (!NoInfs)
+ {
+ yield return 0x00000000FF800000ul; // -Infinity
+ yield return 0x000000007F800000ul; // +Infinity
+ }
+
+ if (!NoNaNs)
+ {
+ yield return 0x00000000FFC00000ul; // -QNaN (all zeros payload) (float.NaN)
+ yield return 0x00000000FFBFFFFFul; // -SNaN (all ones payload)
+ yield return 0x000000007FC00000ul; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN)
+ yield return 0x000000007FBFFFFFul; // +SNaN (all ones payload)
+ }
+
+ for (int Cnt = 1; Cnt <= RndCnt; Cnt++)
+ {
+ ulong Grbg = TestContext.CurrentContext.Random.NextUInt();
+ ulong Rnd1 = GenNormal_S();
+ ulong Rnd2 = GenSubnormal_S();
+
+ yield return (Grbg << 32) | Rnd1;
+ yield return (Grbg << 32) | Rnd2;
+ }
+ }
+
+ private static IEnumerable<ulong> _2S_F_()
+ {
+ yield return 0xFF7FFFFFFF7FFFFFul; // -Max Normal (float.MinValue)
+ yield return 0x8080000080800000ul; // -Min Normal
+ yield return 0x807FFFFF807FFFFFul; // -Max Subnormal
+ yield return 0x8000000180000001ul; // -Min Subnormal (-float.Epsilon)
+ yield return 0x7F7FFFFF7F7FFFFFul; // +Max Normal (float.MaxValue)
+ yield return 0x0080000000800000ul; // +Min Normal
+ yield return 0x007FFFFF007FFFFFul; // +Max Subnormal
+ yield return 0x0000000100000001ul; // +Min Subnormal (float.Epsilon)
+
+ if (!NoZeros)
+ {
+ yield return 0x8000000080000000ul; // -Zero
+ yield return 0x0000000000000000ul; // +Zero
+ }
+
+ if (!NoInfs)
+ {
+ yield return 0xFF800000FF800000ul; // -Infinity
+ yield return 0x7F8000007F800000ul; // +Infinity
+ }
+
+ if (!NoNaNs)
+ {
+ yield return 0xFFC00000FFC00000ul; // -QNaN (all zeros payload) (float.NaN)
+ yield return 0xFFBFFFFFFFBFFFFFul; // -SNaN (all ones payload)
+ yield return 0x7FC000007FC00000ul; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN)
+ yield return 0x7FBFFFFF7FBFFFFFul; // +SNaN (all ones payload)
+ }
+
+ for (int Cnt = 1; Cnt <= RndCnt; Cnt++)
+ {
+ ulong Rnd1 = GenNormal_S();
+ ulong Rnd2 = GenSubnormal_S();
+
+ yield return (Rnd1 << 32) | Rnd1;
+ yield return (Rnd2 << 32) | Rnd2;
+ }
+ }
+
+ private static IEnumerable<ulong> _1D_F_()
+ {
+ yield return 0xFFEFFFFFFFFFFFFFul; // -Max Normal (double.MinValue)
+ yield return 0x8010000000000000ul; // -Min Normal
+ yield return 0x800FFFFFFFFFFFFFul; // -Max Subnormal
+ yield return 0x8000000000000001ul; // -Min Subnormal (-double.Epsilon)
+ yield return 0x7FEFFFFFFFFFFFFFul; // +Max Normal (double.MaxValue)
+ yield return 0x0010000000000000ul; // +Min Normal
+ yield return 0x000FFFFFFFFFFFFFul; // +Max Subnormal
+ yield return 0x0000000000000001ul; // +Min Subnormal (double.Epsilon)
+
+ if (!NoZeros)
+ {
+ yield return 0x8000000000000000ul; // -Zero
+ yield return 0x0000000000000000ul; // +Zero
+ }
+
+ if (!NoInfs)
+ {
+ yield return 0xFFF0000000000000ul; // -Infinity
+ yield return 0x7FF0000000000000ul; // +Infinity
+ }
+
+ if (!NoNaNs)
+ {
+ yield return 0xFFF8000000000000ul; // -QNaN (all zeros payload) (double.NaN)
+ yield return 0xFFF7FFFFFFFFFFFFul; // -SNaN (all ones payload)
+ yield return 0x7FF8000000000000ul; // +QNaN (all zeros payload) (-double.NaN) (DefaultNaN)
+ yield return 0x7FF7FFFFFFFFFFFFul; // +SNaN (all ones payload)
+ }
+
+ for (int Cnt = 1; Cnt <= RndCnt; Cnt++)
+ {
+ ulong Rnd1 = GenNormal_D();
+ ulong Rnd2 = GenSubnormal_D();
+
+ yield return Rnd1;
+ yield return Rnd2;
+ }
+ }
+#endregion
+
+#region "ValueSource (Opcodes)"
+ private static uint[] _F_Mla_Mls_Se_S_()
+ {
+ return new uint[]
+ {
+ 0x5F821020u, // FMLA S0, S1, V2.S[0]
+ 0x5F825020u // FMLS S0, S1, V2.S[0]
+ };
+ }
+
+ private static uint[] _F_Mla_Mls_Se_D_()
+ {
+ return new uint[]
+ {
+ 0x5FC21020u, // FMLA D0, D1, V2.D[0]
+ 0x5FC25020u // FMLS D0, D1, V2.D[0]
+ };
+ }
+
+ private static uint[] _F_Mla_Mls_Ve_2S_4S_()
+ {
+ return new uint[]
+ {
+ 0x0F801000u, // FMLA V0.2S, V0.2S, V0.S[0]
+ 0x0F805000u // FMLS V0.2S, V0.2S, V0.S[0]
+ };
+ }
+
+ private static uint[] _F_Mla_Mls_Ve_2D_()
+ {
+ return new uint[]
+ {
+ 0x4FC01000u, // FMLA V0.2D, V0.2D, V0.D[0]
+ 0x4FC05000u // FMLS V0.2D, V0.2D, V0.D[0]
+ };
+ }
+
+ private static uint[] _F_Mul_Mulx_Se_S_()
+ {
+ return new uint[]
+ {
+ 0x5F829020u, // FMUL S0, S1, V2.S[0]
+ 0x7F829020u // FMULX S0, S1, V2.S[0]
+ };
+ }
+
+ private static uint[] _F_Mul_Mulx_Se_D_()
+ {
+ return new uint[]
+ {
+ 0x5FC29020u, // FMUL D0, D1, V2.D[0]
+ 0x7FC29020u // FMULX D0, D1, V2.D[0]
+ };
+ }
+
+ private static uint[] _F_Mul_Mulx_Ve_2S_4S_()
+ {
+ return new uint[]
+ {
+ 0x0F809000u, // FMUL V0.2S, V0.2S, V0.S[0]
+ 0x2F809000u // FMULX V0.2S, V0.2S, V0.S[0]
+ };
+ }
+
+ private static uint[] _F_Mul_Mulx_Ve_2D_()
+ {
+ return new uint[]
+ {
+ 0x4FC09000u, // FMUL V0.2D, V0.2D, V0.D[0]
+ 0x6FC09000u // FMULX V0.2D, V0.2D, V0.D[0]
+ };
+ }
+#endregion
+
+ private const int RndCnt = 2;
+
+ private static readonly bool NoZeros = false;
+ private static readonly bool NoInfs = false;
+ private static readonly bool NoNaNs = false;
+
+ [Test, Pairwise] [Explicit] // Fused.
+ public void F_Mla_Mls_Se_S([ValueSource("_F_Mla_Mls_Se_S_")] uint Opcodes,
+ [ValueSource("_1S_F_")] ulong Z,
+ [ValueSource("_1S_F_")] ulong A,
+ [ValueSource("_2S_F_")] ulong B,
+ [Values(0u, 1u, 2u, 3u)] uint Index)
+ {
+ uint H = (Index >> 1) & 1;
+ uint L = Index & 1;
+
+ Opcodes |= (L << 21) | (H << 11);
+
+ Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+ Vector128<float> V1 = MakeVectorE0(A);
+ Vector128<float> V2 = MakeVectorE0E1(B, B * H);
+
+ int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN);
+
+ AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr);
+
+ CompareAgainstUnicorn(FPSR.IOC, FpSkips.IfUnderflow, FpTolerances.UpToOneUlps_S);
+ }
+
+ [Test, Pairwise] [Explicit] // Fused.
+ public void F_Mla_Mls_Se_D([ValueSource("_F_Mla_Mls_Se_D_")] uint Opcodes,
+ [ValueSource("_1D_F_")] ulong Z,
+ [ValueSource("_1D_F_")] ulong A,
+ [ValueSource("_1D_F_")] ulong B,
+ [Values(0u, 1u)] uint Index)
+ {
+ uint H = Index & 1;
+
+ Opcodes |= H << 11;
+
+ Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+ Vector128<float> V1 = MakeVectorE0(A);
+ Vector128<float> V2 = MakeVectorE0E1(B, B * H);
+
+ int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN);
+
+ AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr);
+
+ CompareAgainstUnicorn(FPSR.IOC, FpSkips.IfUnderflow, FpTolerances.UpToOneUlps_D);
+ }
+
+ [Test, Pairwise] [Explicit] // Fused.
+ public void F_Mla_Mls_Ve_2S_4S([ValueSource("_F_Mla_Mls_Ve_2S_4S_")] uint Opcodes,
+ [Values(0u)] uint Rd,
+ [Values(1u, 0u)] uint Rn,
+ [Values(2u, 0u)] uint Rm,
+ [ValueSource("_2S_F_")] ulong Z,
+ [ValueSource("_2S_F_")] ulong A,
+ [ValueSource("_2S_F_")] ulong B,
+ [Values(0u, 1u, 2u, 3u)] uint Index,
+ [Values(0b0u, 0b1u)] uint Q) // <2S, 4S>
+ {
+ uint H = (Index >> 1) & 1;
+ uint L = Index & 1;
+
+ Opcodes |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+ Opcodes |= (L << 21) | (H << 11);
+ Opcodes |= ((Q & 1) << 30);
+
+ Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+ Vector128<float> V1 = MakeVectorE0E1(A, A * Q);
+ Vector128<float> V2 = MakeVectorE0E1(B, B * H);
+
+ int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN);
+
+ AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr);
+
+ CompareAgainstUnicorn(FPSR.IOC, FpSkips.IfUnderflow, FpTolerances.UpToOneUlps_S);
+ }
+
+ [Test, Pairwise] [Explicit] // Fused.
+ public void F_Mla_Mls_Ve_2D([ValueSource("_F_Mla_Mls_Ve_2D_")] uint Opcodes,
+ [Values(0u)] uint Rd,
+ [Values(1u, 0u)] uint Rn,
+ [Values(2u, 0u)] uint Rm,
+ [ValueSource("_1D_F_")] ulong Z,
+ [ValueSource("_1D_F_")] ulong A,
+ [ValueSource("_1D_F_")] ulong B,
+ [Values(0u, 1u)] uint Index)
+ {
+ uint H = Index & 1;
+
+ Opcodes |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+ Opcodes |= H << 11;
+
+ Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+ Vector128<float> V1 = MakeVectorE0E1(A, A);
+ Vector128<float> V2 = MakeVectorE0E1(B, B * H);
+
+ int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN);
+
+ AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr);
+
+ CompareAgainstUnicorn(FPSR.IOC, FpSkips.IfUnderflow, FpTolerances.UpToOneUlps_D);
+ }
+
+ [Test, Pairwise] [Explicit]
+ public void F_Mul_Mulx_Se_S([ValueSource("_F_Mul_Mulx_Se_S_")] uint Opcodes,
+ [ValueSource("_1S_F_")] ulong A,
+ [ValueSource("_2S_F_")] ulong B,
+ [Values(0u, 1u, 2u, 3u)] uint Index)
+ {
+ uint H = (Index >> 1) & 1;
+ uint L = Index & 1;
+
+ Opcodes |= (L << 21) | (H << 11);
+
+ ulong Z = TestContext.CurrentContext.Random.NextULong();
+ Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+ Vector128<float> V1 = MakeVectorE0(A);
+ Vector128<float> V2 = MakeVectorE0E1(B, B * H);
+
+ int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN);
+
+ AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr);
+
+ CompareAgainstUnicorn(FpsrMask: FPSR.IOC);
+ }
+
+ [Test, Pairwise] [Explicit]
+ public void F_Mul_Mulx_Se_D([ValueSource("_F_Mul_Mulx_Se_D_")] uint Opcodes,
+ [ValueSource("_1D_F_")] ulong A,
+ [ValueSource("_1D_F_")] ulong B,
+ [Values(0u, 1u)] uint Index)
+ {
+ uint H = Index & 1;
+
+ Opcodes |= H << 11;
+
+ ulong Z = TestContext.CurrentContext.Random.NextULong();
+ Vector128<float> V0 = MakeVectorE1(Z);
+ Vector128<float> V1 = MakeVectorE0(A);
+ Vector128<float> V2 = MakeVectorE0E1(B, B * H);
+
+ int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN);
+
+ AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr);
+
+ CompareAgainstUnicorn(FpsrMask: FPSR.IOC);
+ }
+
+ [Test, Pairwise] [Explicit]
+ public void F_Mul_Mulx_Ve_2S_4S([ValueSource("_F_Mul_Mulx_Ve_2S_4S_")] uint Opcodes,
+ [Values(0u)] uint Rd,
+ [Values(1u, 0u)] uint Rn,
+ [Values(2u, 0u)] uint Rm,
+ [ValueSource("_2S_F_")] ulong Z,
+ [ValueSource("_2S_F_")] ulong A,
+ [ValueSource("_2S_F_")] ulong B,
+ [Values(0u, 1u, 2u, 3u)] uint Index,
+ [Values(0b0u, 0b1u)] uint Q) // <2S, 4S>
+ {
+ uint H = (Index >> 1) & 1;
+ uint L = Index & 1;
+
+ Opcodes |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+ Opcodes |= (L << 21) | (H << 11);
+ Opcodes |= ((Q & 1) << 30);
+
+ Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+ Vector128<float> V1 = MakeVectorE0E1(A, A * Q);
+ Vector128<float> V2 = MakeVectorE0E1(B, B * H);
+
+ int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN);
+
+ AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr);
+
+ CompareAgainstUnicorn(FpsrMask: FPSR.IOC);
+ }
+
+ [Test, Pairwise] [Explicit]
+ public void F_Mul_Mulx_Ve_2D([ValueSource("_F_Mul_Mulx_Ve_2D_")] uint Opcodes,
+ [Values(0u)] uint Rd,
+ [Values(1u, 0u)] uint Rn,
+ [Values(2u, 0u)] uint Rm,
+ [ValueSource("_1D_F_")] ulong Z,
+ [ValueSource("_1D_F_")] ulong A,
+ [ValueSource("_1D_F_")] ulong B,
+ [Values(0u, 1u)] uint Index)
+ {
+ uint H = Index & 1;
+
+ Opcodes |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0);
+ Opcodes |= H << 11;
+
+ Vector128<float> V0 = MakeVectorE0E1(Z, Z);
+ Vector128<float> V1 = MakeVectorE0E1(A, A);
+ Vector128<float> V2 = MakeVectorE0E1(B, B * H);
+
+ int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN);
+
+ AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr);
+
+ CompareAgainstUnicorn(FpsrMask: FPSR.IOC);
+ }
+#endif
+ }
+}