aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMerry <MerryMage@users.noreply.github.com>2018-07-08 20:54:47 +0100
committergdkchan <gab.dark.100@gmail.com>2018-07-08 16:54:47 -0300
commit0f8f40486d1b3215c845325744bd545149223805 (patch)
treee843edc51415e9f4402940fa579bd967a26dd266
parent6479c3e48479259bca79bee6f1016e8108cc33a8 (diff)
ChocolArm64: More accurate implementation of Frecpe & Frecps (#228)
* ChocolArm64: More accurate implementation of Frecpe * ChocolArm64: Handle infinities and zeros in Frecps
-rw-r--r--ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs100
-rw-r--r--ChocolArm64/Instruction/AInstEmitSimdHelper.cs20
-rw-r--r--ChocolArm64/Instruction/ASoftFloat.cs120
-rw-r--r--Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs39
4 files changed, 170 insertions, 109 deletions
diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
index b96b71be..39331f96 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
@@ -641,106 +641,34 @@ namespace ChocolArm64.Instruction
public static void Frecpe_S(AILEmitterCtx Context)
{
- EmitFrecpe(Context, 0, Scalar: true);
- }
-
- public static void Frecpe_V(AILEmitterCtx Context)
- {
- AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
-
- int SizeF = Op.Size & 1;
-
- int Bytes = Context.CurrOp.GetBitsCount() >> 3;
-
- for (int Index = 0; Index < Bytes >> SizeF + 2; Index++)
- {
- EmitFrecpe(Context, Index, Scalar: false);
- }
-
- if (Op.RegisterSize == ARegisterSize.SIMD64)
+ EmitScalarUnaryOpF(Context, () =>
{
- EmitVectorZeroUpper(Context, Op.Rd);
- }
+ EmitUnarySoftFloatCall(Context, nameof(ASoftFloat.RecipEstimate));
+ });
}
- private static void EmitFrecpe(AILEmitterCtx Context, int Index, bool Scalar)
+ public static void Frecpe_V(AILEmitterCtx Context)
{
- AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
-
- int SizeF = Op.Size & 1;
-
- if (SizeF == 0)
- {
- Context.EmitLdc_R4(1);
- }
- else /* if (SizeF == 1) */
- {
- Context.EmitLdc_R8(1);
- }
-
- EmitVectorExtractF(Context, Op.Rn, Index, SizeF);
-
- Context.Emit(OpCodes.Div);
-
- if (Scalar)
+ EmitVectorUnaryOpF(Context, () =>
{
- EmitVectorZeroAll(Context, Op.Rd);
- }
-
- EmitVectorInsertF(Context, Op.Rd, Index, SizeF);
+ EmitUnarySoftFloatCall(Context, nameof(ASoftFloat.RecipEstimate));
+ });
}
public static void Frecps_S(AILEmitterCtx Context)
{
- EmitFrecps(Context, 0, Scalar: true);
- }
-
- public static void Frecps_V(AILEmitterCtx Context)
- {
- AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
-
- int SizeF = Op.Size & 1;
-
- int Bytes = Context.CurrOp.GetBitsCount() >> 3;
-
- for (int Index = 0; Index < Bytes >> SizeF + 2; Index++)
- {
- EmitFrecps(Context, Index, Scalar: false);
- }
-
- if (Op.RegisterSize == ARegisterSize.SIMD64)
+ EmitScalarBinaryOpF(Context, () =>
{
- EmitVectorZeroUpper(Context, Op.Rd);
- }
+ EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.RecipStep));
+ });
}
- private static void EmitFrecps(AILEmitterCtx Context, int Index, bool Scalar)
+ public static void Frecps_V(AILEmitterCtx Context)
{
- AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
-
- int SizeF = Op.Size & 1;
-
- if (SizeF == 0)
- {
- Context.EmitLdc_R4(2);
- }
- else /* if (SizeF == 1) */
- {
- Context.EmitLdc_R8(2);
- }
-
- EmitVectorExtractF(Context, Op.Rn, Index, SizeF);
- EmitVectorExtractF(Context, Op.Rm, Index, SizeF);
-
- Context.Emit(OpCodes.Mul);
- Context.Emit(OpCodes.Sub);
-
- if (Scalar)
+ EmitVectorBinaryOpF(Context, () =>
{
- EmitVectorZeroAll(Context, Op.Rd);
- }
-
- EmitVectorInsertF(Context, Op.Rd, Index, SizeF);
+ EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.RecipStep));
+ });
}
public static void Frinta_S(AILEmitterCtx Context)
diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
index 0f6ea42c..d895ec9c 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
@@ -253,6 +253,26 @@ namespace ChocolArm64.Instruction
Context.EmitCall(MthdInfo);
}
+ public static void EmitBinarySoftFloatCall(AILEmitterCtx Context, string Name)
+ {
+ IAOpCodeSimd Op = (IAOpCodeSimd)Context.CurrOp;
+
+ int SizeF = Op.Size & 1;
+
+ MethodInfo MthdInfo;
+
+ if (SizeF == 0)
+ {
+ MthdInfo = typeof(ASoftFloat).GetMethod(Name, new Type[] { typeof(float), typeof(float) });
+ }
+ else /* if (SizeF == 1) */
+ {
+ MthdInfo = typeof(ASoftFloat).GetMethod(Name, new Type[] { typeof(double), typeof(double) });
+ }
+
+ Context.EmitCall(MthdInfo);
+ }
+
public static void EmitScalarBinaryOpByElemF(AILEmitterCtx Context, Action Emit)
{
AOpCodeSimdRegElemF Op = (AOpCodeSimdRegElemF)Context.CurrOp;
diff --git a/ChocolArm64/Instruction/ASoftFloat.cs b/ChocolArm64/Instruction/ASoftFloat.cs
index 1bd71665..e63c82be 100644
--- a/ChocolArm64/Instruction/ASoftFloat.cs
+++ b/ChocolArm64/Instruction/ASoftFloat.cs
@@ -7,8 +7,10 @@ namespace ChocolArm64.Instruction
static ASoftFloat()
{
InvSqrtEstimateTable = BuildInvSqrtEstimateTable();
+ RecipEstimateTable = BuildRecipEstimateTable();
}
+ private static readonly byte[] RecipEstimateTable;
private static readonly byte[] InvSqrtEstimateTable;
private static byte[] BuildInvSqrtEstimateTable()
@@ -38,6 +40,22 @@ namespace ChocolArm64.Instruction
return Table;
}
+ private static byte[] BuildRecipEstimateTable()
+ {
+ byte[] Table = new byte[256];
+ for (ulong index = 0; index < 256; index++)
+ {
+ ulong a = index | 0x100;
+
+ a = (a << 1) + 1;
+ ulong b = 0x80000 / a;
+ b = (b + 1) >> 1;
+
+ Table[index] = (byte)(b & 0xFF);
+ }
+ return Table;
+ }
+
public static float InvSqrtEstimate(float x)
{
return (float)InvSqrtEstimate((double)x);
@@ -105,5 +123,107 @@ namespace ChocolArm64.Instruction
ulong result = x_sign | (result_exp << 52) | fraction;
return BitConverter.Int64BitsToDouble((long)result);
}
+
+ public static float RecipEstimate(float x)
+ {
+ return (float)RecipEstimate((double)x);
+ }
+
+ public static double RecipEstimate(double x)
+ {
+ ulong x_bits = (ulong)BitConverter.DoubleToInt64Bits(x);
+ ulong x_sign = x_bits & 0x8000000000000000;
+ ulong x_exp = (x_bits >> 52) & 0x7FF;
+ ulong scaled = x_bits & ((1ul << 52) - 1);
+
+ if (x_exp >= 2045)
+ {
+ if (x_exp == 0x7ff && scaled != 0)
+ {
+ // NaN
+ return BitConverter.Int64BitsToDouble((long)(x_bits | 0x0008000000000000));
+ }
+
+ // Infinity, or Out of range -> Zero
+ return BitConverter.Int64BitsToDouble((long)x_sign);
+ }
+
+ if (x_exp == 0)
+ {
+ if (scaled == 0)
+ {
+ // Zero -> Infinity
+ return BitConverter.Int64BitsToDouble((long)(x_sign | 0x7ff0000000000000));
+ }
+
+ // Denormal
+ if ((scaled & (1ul << 51)) == 0)
+ {
+ x_exp = ~0ul;
+ scaled <<= 2;
+ }
+ else
+ {
+ scaled <<= 1;
+ }
+ }
+
+ scaled >>= 44;
+ scaled &= 0xFF;
+
+ ulong result_exp = (2045 - x_exp) & 0x7FF;
+ ulong estimate = (ulong)RecipEstimateTable[scaled];
+ ulong fraction = estimate << 44;
+
+ if (result_exp == 0)
+ {
+ fraction >>= 1;
+ fraction |= 1ul << 51;
+ }
+ else if (result_exp == 0x7FF)
+ {
+ result_exp = 0;
+ fraction >>= 2;
+ fraction |= 1ul << 50;
+ }
+
+ ulong result = x_sign | (result_exp << 52) | fraction;
+ return BitConverter.Int64BitsToDouble((long)result);
+ }
+
+ public static float RecipStep(float op1, float op2)
+ {
+ return (float)RecipStep((double)op1, (double)op2);
+ }
+
+ public static double RecipStep(double op1, double op2)
+ {
+ op1 = -op1;
+
+ ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1);
+ ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2);
+
+ ulong op1_sign = op1_bits & 0x8000000000000000;
+ ulong op2_sign = op2_bits & 0x8000000000000000;
+ ulong op1_other = op1_bits & 0x7FFFFFFFFFFFFFFF;
+ ulong op2_other = op2_bits & 0x7FFFFFFFFFFFFFFF;
+
+ bool inf1 = op1_other == 0x7ff0000000000000;
+ bool inf2 = op2_other == 0x7ff0000000000000;
+ bool zero1 = op1_other == 0;
+ bool zero2 = op2_other == 0;
+
+ if ((inf1 && zero2) || (zero1 && inf2))
+ {
+ return 2.0;
+ }
+ else if (inf1 || inf2)
+ {
+ // Infinity
+ return BitConverter.Int64BitsToDouble((long)(0x7ff0000000000000 | (op1_sign ^ op2_sign)));
+ }
+
+ return 2.0 + op1 * op2;
+ }
}
} \ No newline at end of file
diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs
index 98be2fc5..2a0f5ed9 100644
--- a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs
+++ b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs
@@ -163,26 +163,18 @@ namespace Ryujinx.Tests.Cpu
Assert.That(Sse41.Extract(ThreadState.V6, (byte)0), Is.EqualTo(A * B));
}
- [Test, Description("FRECPE D0, D1")]
- public void Frecpe_S([Random(100)] double A)
+ [TestCase(0x00000000u, 0x7F800000u)]
+ [TestCase(0x80000000u, 0xFF800000u)]
+ [TestCase(0x00FFF000u, 0x7E000000u)]
+ [TestCase(0x41200000u, 0x3DCC8000u)]
+ [TestCase(0xC1200000u, 0xBDCC8000u)]
+ [TestCase(0x001FFFFFu, 0x7F800000u)]
+ [TestCase(0x007FF000u, 0x7E800000u)]
+ public void Frecpe_S(uint A, uint Result)
{
- AThreadState ThreadState = SingleOpcode(0x5EE1D820, V1: MakeVectorE0(A));
-
- Assert.That(VectorExtractDouble(ThreadState.V0, 0), Is.EqualTo(1 / A));
- }
-
- [Test, Description("FRECPE V2.4S, V0.4S")]
- public void Frecpe_V([Random(100)] float A)
- {
- AThreadState ThreadState = SingleOpcode(0x4EA1D802, V0: Sse.SetAllVector128(A));
-
- Assert.Multiple(() =>
- {
- Assert.That(Sse41.Extract(ThreadState.V2, (byte)0), Is.EqualTo(1 / A));
- Assert.That(Sse41.Extract(ThreadState.V2, (byte)1), Is.EqualTo(1 / A));
- Assert.That(Sse41.Extract(ThreadState.V2, (byte)2), Is.EqualTo(1 / A));
- Assert.That(Sse41.Extract(ThreadState.V2, (byte)3), Is.EqualTo(1 / A));
- });
+ Vector128<float> V1 = MakeVectorE0(A);
+ AThreadState ThreadState = SingleOpcode(0x5EA1D820, V1: V1);
+ Assert.AreEqual(Result, GetVectorE0(ThreadState.V0));
}
[Test, Description("FRECPS D0, D1, D2")]
@@ -202,12 +194,13 @@ namespace Ryujinx.Tests.Cpu
V2: Sse.SetAllVector128(A),
V0: Sse.SetAllVector128(B));
+ float Result = (float)(2 - ((double)A * (double)B));
Assert.Multiple(() =>
{
- Assert.That(Sse41.Extract(ThreadState.V4, (byte)0), Is.EqualTo(2 - (A * B)));
- Assert.That(Sse41.Extract(ThreadState.V4, (byte)1), Is.EqualTo(2 - (A * B)));
- Assert.That(Sse41.Extract(ThreadState.V4, (byte)2), Is.EqualTo(2 - (A * B)));
- Assert.That(Sse41.Extract(ThreadState.V4, (byte)3), Is.EqualTo(2 - (A * B)));
+ Assert.That(Sse41.Extract(ThreadState.V4, (byte)0), Is.EqualTo(Result));
+ Assert.That(Sse41.Extract(ThreadState.V4, (byte)1), Is.EqualTo(Result));
+ Assert.That(Sse41.Extract(ThreadState.V4, (byte)2), Is.EqualTo(Result));
+ Assert.That(Sse41.Extract(ThreadState.V4, (byte)3), Is.EqualTo(Result));
});
}