diff options
| author | Merry <MerryMage@users.noreply.github.com> | 2018-07-08 20:54:47 +0100 |
|---|---|---|
| committer | gdkchan <gab.dark.100@gmail.com> | 2018-07-08 16:54:47 -0300 |
| commit | 0f8f40486d1b3215c845325744bd545149223805 (patch) | |
| tree | e843edc51415e9f4402940fa579bd967a26dd266 /ChocolArm64/Instruction | |
| parent | 6479c3e48479259bca79bee6f1016e8108cc33a8 (diff) | |
ChocolArm64: More accurate implementation of Frecpe & Frecps (#228)
* ChocolArm64: More accurate implementation of Frecpe
* ChocolArm64: Handle infinities and zeros in Frecps
Diffstat (limited to 'ChocolArm64/Instruction')
| -rw-r--r-- | ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs | 100 | ||||
| -rw-r--r-- | ChocolArm64/Instruction/AInstEmitSimdHelper.cs | 20 | ||||
| -rw-r--r-- | ChocolArm64/Instruction/ASoftFloat.cs | 120 |
3 files changed, 154 insertions, 86 deletions
diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs index b96b71be..39331f96 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs @@ -641,106 +641,34 @@ namespace ChocolArm64.Instruction public static void Frecpe_S(AILEmitterCtx Context) { - EmitFrecpe(Context, 0, Scalar: true); - } - - public static void Frecpe_V(AILEmitterCtx Context) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int SizeF = Op.Size & 1; - - int Bytes = Context.CurrOp.GetBitsCount() >> 3; - - for (int Index = 0; Index < Bytes >> SizeF + 2; Index++) - { - EmitFrecpe(Context, Index, Scalar: false); - } - - if (Op.RegisterSize == ARegisterSize.SIMD64) + EmitScalarUnaryOpF(Context, () => { - EmitVectorZeroUpper(Context, Op.Rd); - } + EmitUnarySoftFloatCall(Context, nameof(ASoftFloat.RecipEstimate)); + }); } - private static void EmitFrecpe(AILEmitterCtx Context, int Index, bool Scalar) + public static void Frecpe_V(AILEmitterCtx Context) { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int SizeF = Op.Size & 1; - - if (SizeF == 0) - { - Context.EmitLdc_R4(1); - } - else /* if (SizeF == 1) */ - { - Context.EmitLdc_R8(1); - } - - EmitVectorExtractF(Context, Op.Rn, Index, SizeF); - - Context.Emit(OpCodes.Div); - - if (Scalar) + EmitVectorUnaryOpF(Context, () => { - EmitVectorZeroAll(Context, Op.Rd); - } - - EmitVectorInsertF(Context, Op.Rd, Index, SizeF); + EmitUnarySoftFloatCall(Context, nameof(ASoftFloat.RecipEstimate)); + }); } public static void Frecps_S(AILEmitterCtx Context) { - EmitFrecps(Context, 0, Scalar: true); - } - - public static void Frecps_V(AILEmitterCtx Context) - { - AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; - - int SizeF = Op.Size & 1; - - int Bytes = Context.CurrOp.GetBitsCount() >> 3; - - for (int Index = 0; Index < Bytes >> SizeF + 2; Index++) - { - EmitFrecps(Context, Index, Scalar: false); - } - - if (Op.RegisterSize == ARegisterSize.SIMD64) + EmitScalarBinaryOpF(Context, () => { - EmitVectorZeroUpper(Context, Op.Rd); - } + EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.RecipStep)); + }); } - private static void EmitFrecps(AILEmitterCtx Context, int Index, bool Scalar) + public static void Frecps_V(AILEmitterCtx Context) { - AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - - int SizeF = Op.Size & 1; - - if (SizeF == 0) - { - Context.EmitLdc_R4(2); - } - else /* if (SizeF == 1) */ - { - Context.EmitLdc_R8(2); - } - - EmitVectorExtractF(Context, Op.Rn, Index, SizeF); - EmitVectorExtractF(Context, Op.Rm, Index, SizeF); - - Context.Emit(OpCodes.Mul); - Context.Emit(OpCodes.Sub); - - if (Scalar) + EmitVectorBinaryOpF(Context, () => { - EmitVectorZeroAll(Context, Op.Rd); - } - - EmitVectorInsertF(Context, Op.Rd, Index, SizeF); + EmitBinarySoftFloatCall(Context, nameof(ASoftFloat.RecipStep)); + }); } public static void Frinta_S(AILEmitterCtx Context) diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs index 0f6ea42c..d895ec9c 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs @@ -253,6 +253,26 @@ namespace ChocolArm64.Instruction Context.EmitCall(MthdInfo); } + public static void EmitBinarySoftFloatCall(AILEmitterCtx Context, string Name) + { + IAOpCodeSimd Op = (IAOpCodeSimd)Context.CurrOp; + + int SizeF = Op.Size & 1; + + MethodInfo MthdInfo; + + if (SizeF == 0) + { + MthdInfo = typeof(ASoftFloat).GetMethod(Name, new Type[] { typeof(float), typeof(float) }); + } + else /* if (SizeF == 1) */ + { + MthdInfo = typeof(ASoftFloat).GetMethod(Name, new Type[] { typeof(double), typeof(double) }); + } + + Context.EmitCall(MthdInfo); + } + public static void EmitScalarBinaryOpByElemF(AILEmitterCtx Context, Action Emit) { AOpCodeSimdRegElemF Op = (AOpCodeSimdRegElemF)Context.CurrOp; diff --git a/ChocolArm64/Instruction/ASoftFloat.cs b/ChocolArm64/Instruction/ASoftFloat.cs index 1bd71665..e63c82be 100644 --- a/ChocolArm64/Instruction/ASoftFloat.cs +++ b/ChocolArm64/Instruction/ASoftFloat.cs @@ -7,8 +7,10 @@ namespace ChocolArm64.Instruction static ASoftFloat() { InvSqrtEstimateTable = BuildInvSqrtEstimateTable(); + RecipEstimateTable = BuildRecipEstimateTable(); } + private static readonly byte[] RecipEstimateTable; private static readonly byte[] InvSqrtEstimateTable; private static byte[] BuildInvSqrtEstimateTable() @@ -38,6 +40,22 @@ namespace ChocolArm64.Instruction return Table; } + private static byte[] BuildRecipEstimateTable() + { + byte[] Table = new byte[256]; + for (ulong index = 0; index < 256; index++) + { + ulong a = index | 0x100; + + a = (a << 1) + 1; + ulong b = 0x80000 / a; + b = (b + 1) >> 1; + + Table[index] = (byte)(b & 0xFF); + } + return Table; + } + public static float InvSqrtEstimate(float x) { return (float)InvSqrtEstimate((double)x); @@ -105,5 +123,107 @@ namespace ChocolArm64.Instruction ulong result = x_sign | (result_exp << 52) | fraction; return BitConverter.Int64BitsToDouble((long)result); } + + public static float RecipEstimate(float x) + { + return (float)RecipEstimate((double)x); + } + + public static double RecipEstimate(double x) + { + ulong x_bits = (ulong)BitConverter.DoubleToInt64Bits(x); + ulong x_sign = x_bits & 0x8000000000000000; + ulong x_exp = (x_bits >> 52) & 0x7FF; + ulong scaled = x_bits & ((1ul << 52) - 1); + + if (x_exp >= 2045) + { + if (x_exp == 0x7ff && scaled != 0) + { + // NaN + return BitConverter.Int64BitsToDouble((long)(x_bits | 0x0008000000000000)); + } + + // Infinity, or Out of range -> Zero + return BitConverter.Int64BitsToDouble((long)x_sign); + } + + if (x_exp == 0) + { + if (scaled == 0) + { + // Zero -> Infinity + return BitConverter.Int64BitsToDouble((long)(x_sign | 0x7ff0000000000000)); + } + + // Denormal + if ((scaled & (1ul << 51)) == 0) + { + x_exp = ~0ul; + scaled <<= 2; + } + else + { + scaled <<= 1; + } + } + + scaled >>= 44; + scaled &= 0xFF; + + ulong result_exp = (2045 - x_exp) & 0x7FF; + ulong estimate = (ulong)RecipEstimateTable[scaled]; + ulong fraction = estimate << 44; + + if (result_exp == 0) + { + fraction >>= 1; + fraction |= 1ul << 51; + } + else if (result_exp == 0x7FF) + { + result_exp = 0; + fraction >>= 2; + fraction |= 1ul << 50; + } + + ulong result = x_sign | (result_exp << 52) | fraction; + return BitConverter.Int64BitsToDouble((long)result); + } + + public static float RecipStep(float op1, float op2) + { + return (float)RecipStep((double)op1, (double)op2); + } + + public static double RecipStep(double op1, double op2) + { + op1 = -op1; + + ulong op1_bits = (ulong)BitConverter.DoubleToInt64Bits(op1); + ulong op2_bits = (ulong)BitConverter.DoubleToInt64Bits(op2); + + ulong op1_sign = op1_bits & 0x8000000000000000; + ulong op2_sign = op2_bits & 0x8000000000000000; + ulong op1_other = op1_bits & 0x7FFFFFFFFFFFFFFF; + ulong op2_other = op2_bits & 0x7FFFFFFFFFFFFFFF; + + bool inf1 = op1_other == 0x7ff0000000000000; + bool inf2 = op2_other == 0x7ff0000000000000; + bool zero1 = op1_other == 0; + bool zero2 = op2_other == 0; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + return 2.0; + } + else if (inf1 || inf2) + { + // Infinity + return BitConverter.Int64BitsToDouble((long)(0x7ff0000000000000 | (op1_sign ^ op2_sign))); + } + + return 2.0 + op1 * op2; + } } }
\ No newline at end of file |
