aboutsummaryrefslogtreecommitdiff
path: root/ARMeilleure
diff options
context:
space:
mode:
authorLDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>2020-08-13 07:34:02 +0200
committerGitHub <noreply@github.com>2020-08-13 02:34:02 -0300
commit6938988427e7f96adcd8fe76fe5d0a19b014b2b2 (patch)
tree39b049344e5d00b5f152b5354e7c8090ebf46c41 /ARMeilleure
parent1ad9045c6b00a5c729c8c7d697f3da54ed177883 (diff)
Fix Vcvt_FI & Vcvt_RM; Add Vfma_S & Vfms_S. Add Tests. (#1471)
* Fix Vcvt_FI & Vcvt_RM; Add Vfma_S & Vfms_S. Add Tests. * Address PR feedback & Nit.
Diffstat (limited to 'ARMeilleure')
-rw-r--r--ARMeilleure/Decoders/OpCode32SimdCvtFI.cs14
-rw-r--r--ARMeilleure/Decoders/OpCode32SimdS.cs9
-rw-r--r--ARMeilleure/Decoders/OpCodeTable.cs12
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs38
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdCvt32.cs16
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdHelper32.cs2
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdMove32.cs2
-rw-r--r--ARMeilleure/Instructions/InstName.cs2
-rw-r--r--ARMeilleure/Translation/PTC/Ptc.cs2
9 files changed, 75 insertions, 22 deletions
diff --git a/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs b/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs
index aaedcb3c..b654a192 100644
--- a/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs
+++ b/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs
@@ -2,12 +2,20 @@
{
class OpCode32SimdCvtFI : OpCode32SimdS
{
- public int Opc2 { get; private set; }
-
public OpCode32SimdCvtFI(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
{
- Opc2 = (opCode >> 16) & 0x7;
Opc = (opCode >> 7) & 0x1;
+
+ bool toInteger = (Opc2 & 0b100) != 0;
+
+ if (toInteger)
+ {
+ Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e);
+ }
+ else
+ {
+ Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e);
+ }
}
}
}
diff --git a/ARMeilleure/Decoders/OpCode32SimdS.cs b/ARMeilleure/Decoders/OpCode32SimdS.cs
index 2e860d9c..766cf4ba 100644
--- a/ARMeilleure/Decoders/OpCode32SimdS.cs
+++ b/ARMeilleure/Decoders/OpCode32SimdS.cs
@@ -2,14 +2,17 @@
{
class OpCode32SimdS : OpCode32, IOpCode32Simd
{
- public int Vd { get; private set; }
- public int Vm { get; private set; }
- public int Opc { get; protected set; }
+ public int Vd { get; protected set; }
+ public int Vm { get; protected set; }
+ public int Opc { get; protected set; } // "with_zero" (Opc<1>) [Vcmp, Vcmpe].
+ public int Opc2 { get; private set; } // opc2 or RM (opc2<1:0>) [Vcvt, Vrint].
public int Size { get; protected set; }
public OpCode32SimdS(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode)
{
Opc = (opCode >> 15) & 0x3;
+ Opc2 = (opCode >> 16) & 0x7;
+
Size = (opCode >> 8) & 0x3;
bool single = Size != 3;
diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs
index bbcc15ba..4daccfdb 100644
--- a/ARMeilleure/Decoders/OpCodeTable.cs
+++ b/ARMeilleure/Decoders/OpCodeTable.cs
@@ -825,15 +825,17 @@ namespace ARMeilleure.Decoders
SetA32("<<<<11101x11010xxxxx101x01x0xxxx", InstName.Vcmp, InstEmit32.Vcmp, typeof(OpCode32SimdS));
SetA32("<<<<11101x11010xxxxx101x11x0xxxx", InstName.Vcmpe, InstEmit32.Vcmpe, typeof(OpCode32SimdS));
SetA32("<<<<11101x110111xxxx101x11x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FD, typeof(OpCode32SimdS)); // FP 32 and 64, scalar.
- SetA32("<<<<11101x11110xxxxx10xx11x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, typeof(OpCode32SimdCvtFI)); // FP32 to int.
- SetA32("<<<<11101x111000xxxx10xxx1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, typeof(OpCode32SimdCvtFI)); // Int to FP32.
- SetA32("111111101x1111xxxxxx10>>x1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_R, typeof(OpCode32SimdCvtFI)); // The many FP32 to int encodings (fp).
+ SetA32("<<<<11101x11110xxxxx101x11x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, typeof(OpCode32SimdCvtFI)); // FP32 to int.
+ SetA32("<<<<11101x111000xxxx101xx1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, typeof(OpCode32SimdCvtFI)); // Int to FP32.
+ SetA32("111111101x1111xxxxxx101xx1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_RM, typeof(OpCode32SimdCvtFI)); // The many FP32 to int encodings (fp).
SetA32("111100111x111011xxxx011xxxx0xxxx", InstName.Vcvt, InstEmit32.Vcvt_V, typeof(OpCode32SimdCmpZ)); // FP and integer, vector.
SetA32("<<<<11101x00xxxxxxxx101xx0x0xxxx", InstName.Vdiv, InstEmit32.Vdiv_S, typeof(OpCode32SimdRegS));
SetA32("<<<<11101xx0xxxxxxxx1011x0x10000", InstName.Vdup, InstEmit32.Vdup, typeof(OpCode32SimdDupGP));
SetA32("111100111x11xxxxxxxx11000xx0xxxx", InstName.Vdup, InstEmit32.Vdup_1, typeof(OpCode32SimdDupElem));
SetA32("111100110x00xxxxxxxx0001xxx1xxxx", InstName.Veor, InstEmit32.Veor_I, typeof(OpCode32SimdBinary));
SetA32("111100101x11xxxxxxxxxxxxxxx0xxxx", InstName.Vext, InstEmit32.Vext, typeof(OpCode32SimdExt));
+ SetA32("<<<<11101x10xxxxxxxx101xx0x0xxxx", InstName.Vfma, InstEmit32.Vfma_S, typeof(OpCode32SimdRegS));
+ SetA32("<<<<11101x10xxxxxxxx101xx1x0xxxx", InstName.Vfms, InstEmit32.Vfms_S, typeof(OpCode32SimdRegS));
SetA32("111101001x10xxxxxxxxxx00xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemSingle));
SetA32("111101000x10xxxxxxxx0111xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemPair)); // Regs = 1.
SetA32("111101000x10xxxxxxxx1010xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, typeof(OpCode32SimdMemPair)); // Regs = 2.
@@ -918,8 +920,8 @@ namespace ARMeilleure.Decoders
SetA32("111100111x111011xxxx010x0xx0xxxx", InstName.Vrecpe, InstEmit32.Vrecpe, typeof(OpCode32SimdSqrte));
SetA32("111100100x00xxxxxxxx1111xxx1xxxx", InstName.Vrecps, InstEmit32.Vrecps, typeof(OpCode32SimdReg));
SetA32("111100111x11xx00xxxx000<<xx0xxxx", InstName.Vrev, InstEmit32.Vrev, typeof(OpCode32SimdRev));
- SetA32("111111101x1110xxxxxx101x01x0xxxx", InstName.Vrint, InstEmit32.Vrint_RM, typeof(OpCode32SimdCvtFI));
- SetA32("<<<<11101x110110xxxx101x11x0xxxx", InstName.Vrint, InstEmit32.Vrint_Z, typeof(OpCode32SimdCvtFI));
+ SetA32("111111101x1110xxxxxx101x01x0xxxx", InstName.Vrint, InstEmit32.Vrint_RM, typeof(OpCode32SimdS));
+ SetA32("<<<<11101x110110xxxx101x11x0xxxx", InstName.Vrint, InstEmit32.Vrint_Z, typeof(OpCode32SimdS));
SetA32("1111001x1x>>>xxxxxxx0010>xx1xxxx", InstName.Vrshr, InstEmit32.Vrshr, typeof(OpCode32SimdShImm));
SetA32("111100111x111011xxxx010x1xx0xxxx", InstName.Vrsqrte, InstEmit32.Vrsqrte, typeof(OpCode32SimdSqrte));
SetA32("111100100x10xxxxxxxx1111xxx1xxxx", InstName.Vrsqrts, InstEmit32.Vrsqrts, typeof(OpCode32SimdReg));
diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
index f7f3d47e..57176794 100644
--- a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
@@ -231,6 +231,38 @@ namespace ARMeilleure.Instructions
}
}
+ public static void Vfma_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ // TODO: Use FMA instruction set.
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd);
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3);
+ });
+ }
+ }
+
+ public static void Vfms_S(ArmEmitterContext context) // Fused.
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse2)
+ {
+ // TODO: Use FMA instruction set.
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd);
+ }
+ else
+ {
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3);
+ });
+ }
+ }
+
public static void Vmov_S(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
@@ -586,7 +618,8 @@ namespace ARMeilleure.Instructions
{
EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
{
- return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3);
+ Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, res);
});
}
}
@@ -657,7 +690,8 @@ namespace ARMeilleure.Instructions
{
EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
{
- return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3);
+ Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3);
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, res);
});
}
}
diff --git a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
index 00b8ffd6..e4efea70 100644
--- a/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdCvt32.cs
@@ -139,6 +139,7 @@ namespace ARMeilleure.Instructions
}
}
+ // VCVT (floating-point to integer, floating-point) | VCVT (integer to floating-point, floating-point).
public static void Vcvt_FI(ArmEmitterContext context)
{
OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
@@ -236,13 +237,14 @@ namespace ARMeilleure.Instructions
return roundMode;
}
- public static void Vcvt_R(ArmEmitterContext context)
+ // VCVTA/M/N/P (floating-point).
+ public static void Vcvt_RM(ArmEmitterContext context)
{
- OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
+ OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; // toInteger == true (opCode<18> == 1 => Opc2<2> == 1).
OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
- bool unsigned = (op.Opc & 1) == 0;
+ bool unsigned = op.Opc == 0;
int rm = op.Opc2 & 3;
if (Optimizations.UseSse41 && rm != 0b00)
@@ -277,9 +279,10 @@ namespace ARMeilleure.Instructions
}
}
+ // VRINTA/M/N/P (floating-point).
public static void Vrint_RM(ArmEmitterContext context)
{
- OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32;
@@ -320,9 +323,10 @@ namespace ARMeilleure.Instructions
}
}
+ // VRINTZ (floating-point).
public static void Vrint_Z(ArmEmitterContext context)
{
- IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+ OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
if (Optimizations.UseSse2)
{
@@ -355,7 +359,7 @@ namespace ARMeilleure.Instructions
private static void EmitSse41ConvertInt32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed)
{
// A port of the similar round function in InstEmitSimdCvt.
- OpCode32SimdS op = (OpCode32SimdS)context.CurrOp;
+ OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp;
bool doubleSize = (op.Size & 1) != 0;
int shift = doubleSize ? 1 : 2;
diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
index e045c601..a962c0fc 100644
--- a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
@@ -906,7 +906,7 @@ namespace ARMeilleure.Instructions
OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp;
bool doubleSize = (op.Size & 1) != 0;
- int shift = doubleSize ? 1 : 2;
+
Intrinsic inst1 = doubleSize ? inst64pt1 : inst32pt1;
Intrinsic inst2 = doubleSize ? inst64pt2 : inst32pt2;
diff --git a/ARMeilleure/Instructions/InstEmitSimdMove32.cs b/ARMeilleure/Instructions/InstEmitSimdMove32.cs
index b484381f..52292242 100644
--- a/ARMeilleure/Instructions/InstEmitSimdMove32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdMove32.cs
@@ -559,7 +559,7 @@ namespace ARMeilleure.Instructions
}
}
- public static void EmitVectorShuffleOpSimd32(ArmEmitterContext context, Func<Operand, Operand, (Operand, Operand)> shuffleFunc)
+ private static void EmitVectorShuffleOpSimd32(ArmEmitterContext context, Func<Operand, Operand, (Operand, Operand)> shuffleFunc)
{
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs
index d7283029..9e820f6b 100644
--- a/ARMeilleure/Instructions/InstName.cs
+++ b/ARMeilleure/Instructions/InstName.cs
@@ -563,6 +563,8 @@ namespace ARMeilleure.Instructions
Vdup,
Veor,
Vext,
+ Vfma,
+ Vfms,
Vld1,
Vld2,
Vld3,
diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs
index 2ff98f85..ccb3f705 100644
--- a/ARMeilleure/Translation/PTC/Ptc.cs
+++ b/ARMeilleure/Translation/PTC/Ptc.cs
@@ -20,7 +20,7 @@ namespace ARMeilleure.Translation.PTC
{
private const string HeaderMagic = "PTChd";
- private const int InternalVersion = 20; //! To be incremented manually for each change to the ARMeilleure project.
+ private const int InternalVersion = 1471; //! To be incremented manually for each change to the ARMeilleure project.
private const string BaseDir = "Ryujinx";