aboutsummaryrefslogtreecommitdiff
path: root/ARMeilleure
diff options
context:
space:
mode:
authorLDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com>2020-07-13 13:08:47 +0200
committerGitHub <noreply@github.com>2020-07-13 21:08:47 +1000
commita804db6eed016a8a1f152c2837fc7b65e50f02df (patch)
tree5ac490b42ba8fc2c22038bb784de4ea3fcda352f /ARMeilleure
parentd7044b10a253dae31b9a0041a432e3a7adce59f6 (diff)
Add Fmax/minv_V & S/Ushl_S Inst.s with Tests. Fix Maxps/d & Minps/d d… (#1335)
* Add Fmax/minv_V & S/Ushl_S Inst.s with Tests. Fix Maxps/d & Minps/d double zero sign handling. Allows better handling of NaNs. * Optimized EmitSse2VectorIsNaNOpF() for multiple uses per opF.
Diffstat (limited to 'ARMeilleure')
-rw-r--r--ARMeilleure/Decoders/OpCodeTable.cs6
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdArithmetic.cs356
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs4
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdHelper.cs45
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdShift.cs98
-rw-r--r--ARMeilleure/Instructions/InstName.cs6
-rw-r--r--ARMeilleure/Translation/PTC/Ptc.cs4
7 files changed, 419 insertions, 100 deletions
diff --git a/ARMeilleure/Decoders/OpCodeTable.cs b/ARMeilleure/Decoders/OpCodeTable.cs
index c1632d46..b98fcab1 100644
--- a/ARMeilleure/Decoders/OpCodeTable.cs
+++ b/ARMeilleure/Decoders/OpCodeTable.cs
@@ -332,14 +332,18 @@ namespace ARMeilleure.Decoders
SetA64("0>0011100<1xxxxx111101xxxxxxxxxx", InstName.Fmax_V, InstEmit.Fmax_V, typeof(OpCodeSimdReg));
SetA64("000111100x1xxxxx011010xxxxxxxxxx", InstName.Fmaxnm_S, InstEmit.Fmaxnm_S, typeof(OpCodeSimdReg));
SetA64("0>0011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnm_V, InstEmit.Fmaxnm_V, typeof(OpCodeSimdReg));
+ SetA64("0>1011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnmp_V, InstEmit.Fmaxnmp_V, typeof(OpCodeSimdReg));
SetA64("0110111000110000110010xxxxxxxxxx", InstName.Fmaxnmv_V, InstEmit.Fmaxnmv_V, typeof(OpCodeSimd));
SetA64("0>1011100<1xxxxx111101xxxxxxxxxx", InstName.Fmaxp_V, InstEmit.Fmaxp_V, typeof(OpCodeSimdReg));
+ SetA64("0110111000110000111110xxxxxxxxxx", InstName.Fmaxv_V, InstEmit.Fmaxv_V, typeof(OpCodeSimd));
SetA64("000111100x1xxxxx010110xxxxxxxxxx", InstName.Fmin_S, InstEmit.Fmin_S, typeof(OpCodeSimdReg));
SetA64("0>0011101<1xxxxx111101xxxxxxxxxx", InstName.Fmin_V, InstEmit.Fmin_V, typeof(OpCodeSimdReg));
SetA64("000111100x1xxxxx011110xxxxxxxxxx", InstName.Fminnm_S, InstEmit.Fminnm_S, typeof(OpCodeSimdReg));
SetA64("0>0011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnm_V, InstEmit.Fminnm_V, typeof(OpCodeSimdReg));
+ SetA64("0>1011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnmp_V, InstEmit.Fminnmp_V, typeof(OpCodeSimdReg));
SetA64("0110111010110000110010xxxxxxxxxx", InstName.Fminnmv_V, InstEmit.Fminnmv_V, typeof(OpCodeSimd));
SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", InstName.Fminp_V, InstEmit.Fminp_V, typeof(OpCodeSimdReg));
+ SetA64("0110111010110000111110xxxxxxxxxx", InstName.Fminv_V, InstEmit.Fminv_V, typeof(OpCodeSimd));
SetA64("010111111xxxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Se, InstEmit.Fmla_Se, typeof(OpCodeSimdRegElemF));
SetA64("0>0011100<1xxxxx110011xxxxxxxxxx", InstName.Fmla_V, InstEmit.Fmla_V, typeof(OpCodeSimdReg));
SetA64("0>0011111<xxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Ve, InstEmit.Fmla_Ve, typeof(OpCodeSimdRegElemF));
@@ -529,6 +533,7 @@ namespace ARMeilleure.Decoders
SetA64("0101111101xxxxxx001101xxxxxxxxxx", InstName.Srsra_S, InstEmit.Srsra_S, typeof(OpCodeSimdShImm));
SetA64("0x00111100>>>xxx001101xxxxxxxxxx", InstName.Srsra_V, InstEmit.Srsra_V, typeof(OpCodeSimdShImm));
SetA64("0100111101xxxxxx001101xxxxxxxxxx", InstName.Srsra_V, InstEmit.Srsra_V, typeof(OpCodeSimdShImm));
+ SetA64("01011110111xxxxx010001xxxxxxxxxx", InstName.Sshl_S, InstEmit.Sshl_S, typeof(OpCodeSimdReg));
SetA64("0>001110<<1xxxxx010001xxxxxxxxxx", InstName.Sshl_V, InstEmit.Sshl_V, typeof(OpCodeSimdReg));
SetA64("0x00111100>>>xxx101001xxxxxxxxxx", InstName.Sshll_V, InstEmit.Sshll_V, typeof(OpCodeSimdShImm));
SetA64("0101111101xxxxxx000001xxxxxxxxxx", InstName.Sshr_S, InstEmit.Sshr_S, typeof(OpCodeSimdShImm));
@@ -611,6 +616,7 @@ namespace ARMeilleure.Decoders
SetA64("0111111101xxxxxx001101xxxxxxxxxx", InstName.Ursra_S, InstEmit.Ursra_S, typeof(OpCodeSimdShImm));
SetA64("0x10111100>>>xxx001101xxxxxxxxxx", InstName.Ursra_V, InstEmit.Ursra_V, typeof(OpCodeSimdShImm));
SetA64("0110111101xxxxxx001101xxxxxxxxxx", InstName.Ursra_V, InstEmit.Ursra_V, typeof(OpCodeSimdShImm));
+ SetA64("01111110111xxxxx010001xxxxxxxxxx", InstName.Ushl_S, InstEmit.Ushl_S, typeof(OpCodeSimdReg));
SetA64("0>101110<<1xxxxx010001xxxxxxxxxx", InstName.Ushl_V, InstEmit.Ushl_V, typeof(OpCodeSimdReg));
SetA64("0x10111100>>>xxx101001xxxxxxxxxx", InstName.Ushll_V, InstEmit.Ushll_V, typeof(OpCodeSimdShImm));
SetA64("0111111101xxxxxx000001xxxxxxxxxx", InstName.Ushr_S, InstEmit.Ushr_S, typeof(OpCodeSimdShImm));
diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
index b3041aac..0d417f70 100644
--- a/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs
@@ -382,7 +382,14 @@ namespace ARMeilleure.Instructions
{
if (Optimizations.FastFP && Optimizations.UseSse2)
{
- EmitSse2VectorPairwiseOpF(context, Intrinsic.X86Addps, Intrinsic.X86Addpd);
+ EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ Intrinsic addInst = (op.Size & 1) == 0 ? Intrinsic.X86Addps : Intrinsic.X86Addpd;
+
+ return context.AddIntrinsic(addInst, op1, op2);
+ });
}
else
{
@@ -468,9 +475,12 @@ namespace ARMeilleure.Instructions
public static void Fmax_S(ArmEmitterContext context)
{
- if (Optimizations.FastFP && Optimizations.UseSse2)
+ if (Optimizations.FastFP && Optimizations.UseSse41)
{
- EmitScalarBinaryOpF(context, Intrinsic.X86Maxss, Intrinsic.X86Maxsd);
+ EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
+ }, scalar: true);
}
else
{
@@ -483,9 +493,12 @@ namespace ARMeilleure.Instructions
public static void Fmax_V(ArmEmitterContext context)
{
- if (Optimizations.FastFP && Optimizations.UseSse2)
+ if (Optimizations.FastFP && Optimizations.UseSse41)
{
- EmitVectorBinaryOpF(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd);
+ EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
+ }, scalar: false);
}
else
{
@@ -526,19 +539,53 @@ namespace ARMeilleure.Instructions
}
}
+ public static void Fmaxnmp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2);
+ });
+ }
+ }
+
public static void Fmaxnmv_V(ArmEmitterContext context)
{
- EmitVectorAcrossVectorOpF(context, (op1, op2) =>
+ if (Optimizations.FastFP && Optimizations.UseSse41)
{
- return context.Call(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMaxNum)), op1, op2);
- });
+ EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return context.Call(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMaxNum)), op1, op2);
+ });
+ }
}
public static void Fmaxp_V(ArmEmitterContext context)
{
- if (Optimizations.FastFP && Optimizations.UseSse2)
+ if (Optimizations.FastFP && Optimizations.UseSse41)
{
- EmitSse2VectorPairwiseOpF(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd);
+ EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
+ }, scalar: false, op1, op2);
+ });
}
else
{
@@ -549,11 +596,35 @@ namespace ARMeilleure.Instructions
}
}
+ public static void Fmaxv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true);
+ }, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return context.Call(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMax)), op1, op2);
+ });
+ }
+ }
+
public static void Fmin_S(ArmEmitterContext context)
{
- if (Optimizations.FastFP && Optimizations.UseSse2)
+ if (Optimizations.FastFP && Optimizations.UseSse41)
{
- EmitScalarBinaryOpF(context, Intrinsic.X86Minss, Intrinsic.X86Minsd);
+ EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
+ }, scalar: true);
}
else
{
@@ -566,9 +637,12 @@ namespace ARMeilleure.Instructions
public static void Fmin_V(ArmEmitterContext context)
{
- if (Optimizations.FastFP && Optimizations.UseSse2)
+ if (Optimizations.FastFP && Optimizations.UseSse41)
{
- EmitVectorBinaryOpF(context, Intrinsic.X86Minps, Intrinsic.X86Minpd);
+ EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
+ }, scalar: false);
}
else
{
@@ -609,19 +683,53 @@ namespace ARMeilleure.Instructions
}
}
+ public static void Fminnmp_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2);
+ });
+ }
+ }
+
public static void Fminnmv_V(ArmEmitterContext context)
{
- EmitVectorAcrossVectorOpF(context, (op1, op2) =>
+ if (Optimizations.FastFP && Optimizations.UseSse41)
{
- return context.Call(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMinNum)), op1, op2);
- });
+ EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return context.Call(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMinNum)), op1, op2);
+ });
+ }
}
public static void Fminp_V(ArmEmitterContext context)
{
- if (Optimizations.FastFP && Optimizations.UseSse2)
+ if (Optimizations.FastFP && Optimizations.UseSse41)
{
- EmitSse2VectorPairwiseOpF(context, Intrinsic.X86Minps, Intrinsic.X86Minpd);
+ EmitSse2VectorPairwiseOpF(context, (op1, op2) =>
+ {
+ return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
+ }, scalar: false, op1, op2);
+ });
}
else
{
@@ -632,6 +740,27 @@ namespace ARMeilleure.Instructions
}
}
+ public static void Fminv_V(ArmEmitterContext context)
+ {
+ if (Optimizations.FastFP && Optimizations.UseSse41)
+ {
+ EmitSse2VectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false);
+ }, scalar: false, op1, op2);
+ });
+ }
+ else
+ {
+ EmitVectorAcrossVectorOpF(context, (op1, op2) =>
+ {
+ return context.Call(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMin)), op1, op2);
+ });
+ }
+ }
+
public static void Fmla_Se(ArmEmitterContext context) // Fused.
{
EmitScalarTernaryOpByElemF(context, (op1, op2, op3) =>
@@ -3111,7 +3240,12 @@ namespace ARMeilleure.Instructions
context.Copy(GetVec(op.Rd), res);
}
- public static Operand EmitSse2VectorIsQNaNOpF(ArmEmitterContext context, Operand opF)
+ public static void EmitSse2VectorIsNaNOpF(
+ ArmEmitterContext context,
+ Operand opF,
+ out Operand qNaNMask,
+ out Operand sNaNMask,
+ bool? isQNaN = null)
{
IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
@@ -3126,7 +3260,8 @@ namespace ARMeilleure.Instructions
Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask);
mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, mask2, qMask, Const((int)CmpCondition.Equal));
- return context.AddIntrinsic(Intrinsic.X86Andps, mask1, mask2);
+ qNaNMask = isQNaN == null || (bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andps, mask2, mask1) : null;
+ sNaNMask = isQNaN == null || !(bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andnps, mask2, mask1) : null;
}
else /* if ((op.Size & 1) == 1) */
{
@@ -3139,67 +3274,202 @@ namespace ARMeilleure.Instructions
Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask);
mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, mask2, qMask, Const((int)CmpCondition.Equal));
- return context.AddIntrinsic(Intrinsic.X86Andpd, mask1, mask2);
+ qNaNMask = isQNaN == null || (bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andpd, mask2, mask1) : null;
+ sNaNMask = isQNaN == null || !(bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andnpd, mask2, mask1) : null;
}
}
- private static void EmitSse41MaxMinNumOpF(ArmEmitterContext context, bool isMaxNum, bool scalar)
+ public static Operand EmitSse41ProcessNaNsOpF(
+ ArmEmitterContext context,
+ Func2I emit,
+ bool scalar,
+ Operand n = null,
+ Operand m = null)
{
- OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+ Operand nCopy = n ?? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rn));
+ Operand mCopy = m ?? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rm));
- Operand d = GetVec(op.Rd);
- Operand n = GetVec(op.Rn);
- Operand m = GetVec(op.Rm);
+ EmitSse2VectorIsNaNOpF(context, nCopy, out Operand nQNaNMask, out Operand nSNaNMask);
+ EmitSse2VectorIsNaNOpF(context, mCopy, out _, out Operand mSNaNMask, isQNaN: false);
- Operand nNum = context.Copy(n);
- Operand mNum = context.Copy(m);
+ int sizeF = ((IOpCodeSimd)context.CurrOp).Size & 1;
- Operand nQNaNMask = EmitSse2VectorIsQNaNOpF(context, nNum);
- Operand mQNaNMask = EmitSse2VectorIsQNaNOpF(context, mNum);
+ if (sizeF == 0)
+ {
+ const int QBit = 22;
- int sizeF = op.Size & 1;
+ Operand qMask = scalar ? X86GetScalar(context, 1 << QBit) : X86GetAllElements(context, 1 << QBit);
+
+ Operand resNaNMask = context.AddIntrinsic(Intrinsic.X86Pandn, mSNaNMask, nQNaNMask);
+ resNaNMask = context.AddIntrinsic(Intrinsic.X86Por, resNaNMask, nSNaNMask);
+
+ Operand resNaN = context.AddIntrinsic(Intrinsic.X86Blendvps, mCopy, nCopy, resNaNMask);
+ resNaN = context.AddIntrinsic(Intrinsic.X86Por, resNaN, qMask);
+
+ Operand resMask = context.AddIntrinsic(Intrinsic.X86Cmpps, nCopy, mCopy, Const((int)CmpCondition.OrderedQ));
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Blendvps, resNaN, emit(nCopy, mCopy), resMask);
+
+ if (n != null || m != null)
+ {
+ return res;
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper96(res);
+ }
+ else if (((OpCodeSimdReg)context.CurrOp).RegisterSize == RegisterSize.Simd64)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res);
+
+ return null;
+ }
+ else /* if (sizeF == 1) */
+ {
+ const int QBit = 51;
+
+ Operand qMask = scalar ? X86GetScalar(context, 1L << QBit) : X86GetAllElements(context, 1L << QBit);
+
+ Operand resNaNMask = context.AddIntrinsic(Intrinsic.X86Pandn, mSNaNMask, nQNaNMask);
+ resNaNMask = context.AddIntrinsic(Intrinsic.X86Por, resNaNMask, nSNaNMask);
+
+ Operand resNaN = context.AddIntrinsic(Intrinsic.X86Blendvpd, mCopy, nCopy, resNaNMask);
+ resNaN = context.AddIntrinsic(Intrinsic.X86Por, resNaN, qMask);
+
+ Operand resMask = context.AddIntrinsic(Intrinsic.X86Cmppd, nCopy, mCopy, Const((int)CmpCondition.OrderedQ));
+
+ Operand res = context.AddIntrinsic(Intrinsic.X86Blendvpd, resNaN, emit(nCopy, mCopy), resMask);
+
+ if (n != null || m != null)
+ {
+ return res;
+ }
+
+ if (scalar)
+ {
+ res = context.VectorZeroUpper64(res);
+ }
+
+ context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res);
+
+ return null;
+ }
+ }
+
+ private static Operand EmitSse2VectorMaxMinOpF(ArmEmitterContext context, Operand n, Operand m, bool isMax)
+ {
+ IOpCodeSimd op = (IOpCodeSimd)context.CurrOp;
+
+ if ((op.Size & 1) == 0)
+ {
+ Operand mask = X86GetAllElements(context, -0f);
+
+ Operand res = context.AddIntrinsic(isMax ? Intrinsic.X86Maxps : Intrinsic.X86Minps, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res);
+
+ Operand resSign = context.AddIntrinsic(isMax ? Intrinsic.X86Pand : Intrinsic.X86Por, n, m);
+ resSign = context.AddIntrinsic(Intrinsic.X86Andps, mask, resSign);
+
+ return context.AddIntrinsic(Intrinsic.X86Por, res, resSign);
+ }
+ else /* if ((op.Size & 1) == 1) */
+ {
+ Operand mask = X86GetAllElements(context, -0d);
+
+ Operand res = context.AddIntrinsic(isMax ? Intrinsic.X86Maxpd : Intrinsic.X86Minpd, n, m);
+ res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res);
+
+ Operand resSign = context.AddIntrinsic(isMax ? Intrinsic.X86Pand : Intrinsic.X86Por, n, m);
+ resSign = context.AddIntrinsic(Intrinsic.X86Andpd, mask, resSign);
+
+ return context.AddIntrinsic(Intrinsic.X86Por, res, resSign);
+ }
+ }
+
+ private static Operand EmitSse41MaxMinNumOpF(
+ ArmEmitterContext context,
+ bool isMaxNum,
+ bool scalar,
+ Operand n = null,
+ Operand m = null)
+ {
+ Operand nCopy = n ?? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rn));
+ Operand mCopy = m ?? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rm));
+
+ EmitSse2VectorIsNaNOpF(context, nCopy, out Operand nQNaNMask, out _, isQNaN: true);
+ EmitSse2VectorIsNaNOpF(context, mCopy, out Operand mQNaNMask, out _, isQNaN: true);
+
+ int sizeF = ((IOpCodeSimd)context.CurrOp).Size & 1;
if (sizeF == 0)
{
- Operand negInfMask = X86GetAllElements(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity);
+ Operand negInfMask = scalar
+ ? X86GetScalar (context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity)
+ : X86GetAllElements(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity);
Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnps, mQNaNMask, nQNaNMask);
Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnps, nQNaNMask, mQNaNMask);
- nNum = context.AddIntrinsic(Intrinsic.X86Blendvps, nNum, negInfMask, nMask);
- mNum = context.AddIntrinsic(Intrinsic.X86Blendvps, mNum, negInfMask, mMask);
+ nCopy = context.AddIntrinsic(Intrinsic.X86Blendvps, nCopy, negInfMask, nMask);
+ mCopy = context.AddIntrinsic(Intrinsic.X86Blendvps, mCopy, negInfMask, mMask);
+
+ Operand res = EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum);
+ }, scalar: scalar, nCopy, mCopy);
- Operand res = context.AddIntrinsic(isMaxNum ? Intrinsic.X86Maxps : Intrinsic.X86Minps, nNum, mNum);
+ if (n != null || m != null)
+ {
+ return res;
+ }
if (scalar)
{
res = context.VectorZeroUpper96(res);
}
- else if (op.RegisterSize == RegisterSize.Simd64)
+ else if (((OpCodeSimdReg)context.CurrOp).RegisterSize == RegisterSize.Simd64)
{
res = context.VectorZeroUpper64(res);
}
- context.Copy(d, res);
+ context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res);
+
+ return null;
}
else /* if (sizeF == 1) */
{
- Operand negInfMask = X86GetAllElements(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity);
+ Operand negInfMask = scalar
+ ? X86GetScalar (context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity)
+ : X86GetAllElements(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity);
Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnpd, mQNaNMask, nQNaNMask);
Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnpd, nQNaNMask, mQNaNMask);
- nNum = context.AddIntrinsic(Intrinsic.X86Blendvpd, nNum, negInfMask, nMask);
- mNum = context.AddIntrinsic(Intrinsic.X86Blendvpd, mNum, negInfMask, mMask);
+ nCopy = context.AddIntrinsic(Intrinsic.X86Blendvpd, nCopy, negInfMask, nMask);
+ mCopy = context.AddIntrinsic(Intrinsic.X86Blendvpd, mCopy, negInfMask, mMask);
- Operand res = context.AddIntrinsic(isMaxNum ? Intrinsic.X86Maxpd : Intrinsic.X86Minpd, nNum, mNum);
+ Operand res = EmitSse41ProcessNaNsOpF(context, (op1, op2) =>
+ {
+ return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum);
+ }, scalar: scalar, nCopy, mCopy);
+
+ if (n != null || m != null)
+ {
+ return res;
+ }
if (scalar)
{
res = context.VectorZeroUpper64(res);
}
- context.Copy(d, res);
+ context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res);
+
+ return null;
}
}
diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
index 82f57d63..eb86ac9e 100644
--- a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
@@ -1200,8 +1200,8 @@ namespace ARMeilleure.Instructions
Operand nNum = context.Copy(n);
Operand mNum = context.Copy(m);
- Operand nQNaNMask = InstEmit.EmitSse2VectorIsQNaNOpF(context, nNum);
- Operand mQNaNMask = InstEmit.EmitSse2VectorIsQNaNOpF(context, mNum);
+ InstEmit.EmitSse2VectorIsNaNOpF(context, nNum, out Operand nQNaNMask, out _, isQNaN: true);
+ InstEmit.EmitSse2VectorIsNaNOpF(context, mNum, out Operand mQNaNMask, out _, isQNaN: true);
int sizeF = op.Size & 1;
diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/ARMeilleure/Instructions/InstEmitSimdHelper.cs
index 912c2260..69e79a6d 100644
--- a/ARMeilleure/Instructions/InstEmitSimdHelper.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdHelper.cs
@@ -1095,6 +1095,29 @@ namespace ARMeilleure.Instructions
context.Copy(GetVec(op.Rd), d);
}
+ public static void EmitSse2VectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit)
+ {
+ OpCodeSimd op = (OpCodeSimd)context.CurrOp;
+
+ Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128);
+
+ const int sm0 = 0 << 6 | 0 << 4 | 0 << 2 | 0 << 0;
+ const int sm1 = 1 << 6 | 1 << 4 | 1 << 2 | 1 << 0;
+ const int sm2 = 2 << 6 | 2 << 4 | 2 << 2 | 2 << 0;
+ const int sm3 = 3 << 6 | 3 << 4 | 3 << 2 | 3 << 0;
+
+ Operand nCopy = context.Copy(GetVec(op.Rn));
+
+ Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm0));
+ Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm1));
+ Operand part2 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm2));
+ Operand part3 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm3));
+
+ Operand res = emit(emit(part0, part1), emit(part2, part3));
+
+ context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res));
+ }
+
public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
@@ -1124,12 +1147,12 @@ namespace ARMeilleure.Instructions
context.Copy(GetVec(op.Rd), res);
}
- public static void EmitSse2VectorPairwiseOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64)
+ public static void EmitSse2VectorPairwiseOpF(ArmEmitterContext context, Func2I emit)
{
OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
- Operand n = GetVec(op.Rn);
- Operand m = GetVec(op.Rm);
+ Operand nCopy = context.Copy(GetVec(op.Rn));
+ Operand mCopy = context.Copy(GetVec(op.Rm));
int sizeF = op.Size & 1;
@@ -1137,32 +1160,32 @@ namespace ARMeilleure.Instructions
{
if (op.RegisterSize == RegisterSize.Simd64)
{
- Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, n, m);
+ Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, nCopy, mCopy);
Operand zero = context.VectorZero();
Operand part0 = context.AddIntrinsic(Intrinsic.X86Movlhps, unpck, zero);
Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, unpck);
- context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst32, part0, part1));
+ context.Copy(GetVec(op.Rd), emit(part0, part1));
}
else /* if (op.RegisterSize == RegisterSize.Simd128) */
{
const int sm0 = 2 << 6 | 0 << 4 | 2 << 2 | 0 << 0;
const int sm1 = 3 << 6 | 1 << 4 | 3 << 2 | 1 << 0;
- Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, n, m, Const(sm0));
- Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, n, m, Const(sm1));
+ Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(sm0));
+ Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(sm1));
- context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst32, part0, part1));
+ context.Copy(GetVec(op.Rd), emit(part0, part1));
}
}
else /* if (sizeF == 1) */
{
- Operand part0 = context.AddIntrinsic(Intrinsic.X86Unpcklpd, n, m);
- Operand part1 = context.AddIntrinsic(Intrinsic.X86Unpckhpd, n, m);
+ Operand part0 = context.AddIntrinsic(Intrinsic.X86Unpcklpd, nCopy, mCopy);
+ Operand part1 = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nCopy, mCopy);
- context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst64, part0, part1));
+ context.Copy(GetVec(op.Rd), emit(part0, part1));
}
}
diff --git a/ARMeilleure/Instructions/InstEmitSimdShift.cs b/ARMeilleure/Instructions/InstEmitSimdShift.cs
index 0b3d85ae..62363fde 100644
--- a/ARMeilleure/Instructions/InstEmitSimdShift.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdShift.cs
@@ -391,25 +391,14 @@ namespace ARMeilleure.Instructions
}
}
- public static void Sshl_V(ArmEmitterContext context)
+ public static void Sshl_S(ArmEmitterContext context)
{
- OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
-
- Operand res = context.VectorZero();
-
- int elems = op.GetBytesCount() >> op.Size;
-
- for (int index = 0; index < elems; index++)
- {
- Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size);
- Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size);
-
- Operand e = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.SignedShlReg)), ne, me, Const(0), Const(op.Size));
-
- res = EmitVectorInsert(context, res, e, index, op.Size);
- }
+ EmitSshlOrUshl(context, signed: true, scalar: true);
+ }
- context.Copy(GetVec(op.Rd), res);
+ public static void Sshl_V(ArmEmitterContext context)
+ {
+ EmitSshlOrUshl(context, signed: true, scalar: false);
}
public static void Sshll_V(ArmEmitterContext context)
@@ -686,25 +675,14 @@ namespace ARMeilleure.Instructions
}
}
- public static void Ushl_V(ArmEmitterContext context)
+ public static void Ushl_S(ArmEmitterContext context)
{
- OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
-
- Operand res = context.VectorZero();
-
- int elems = op.GetBytesCount() >> op.Size;
-
- for (int index = 0; index < elems; index++)
- {
- Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size);
- Operand me = EmitVectorExtractSx(context, op.Rm, index << op.Size, 0);
-
- Operand e = EmitUnsignedShlRegOp(context, ne, context.ConvertI64ToI32(me), op.Size);
-
- res = EmitVectorInsert(context, res, e, index, op.Size);
- }
+ EmitSshlOrUshl(context, signed: false, scalar: true);
+ }
- context.Copy(GetVec(op.Rd), res);
+ public static void Ushl_V(ArmEmitterContext context)
+ {
+ EmitSshlOrUshl(context, signed: false, scalar: false);
}
public static void Ushll_V(ArmEmitterContext context)
@@ -894,7 +872,7 @@ namespace ARMeilleure.Instructions
context.Copy(GetVec(op.Rd), res);
}
- private static Operand EmitUnsignedShlRegOp(ArmEmitterContext context, Operand op, Operand shiftLsB, int size)
+ private static Operand EmitShlRegOp(ArmEmitterContext context, Operand op, Operand shiftLsB, int size, bool signed)
{
Debug.Assert(op.Type == OperandType.I64);
Debug.Assert(shiftLsB.Type == OperandType.I32);
@@ -902,18 +880,33 @@ namespace ARMeilleure.Instructions
Operand negShiftLsB = context.Negate(shiftLsB);
+ Operand isInRange = context.BitwiseAnd(
+ context.ICompareLess(shiftLsB, Const(8 << size)),
+ context.ICompareLess(negShiftLsB, Const(8 << size)));
+
Operand isPositive = context.ICompareGreaterOrEqual(shiftLsB, Const(0));
- Operand shl = context.ShiftLeft (op, shiftLsB);
- Operand shr = context.ShiftRightUI(op, negShiftLsB);
+ Operand shl = context.ShiftLeft(op, shiftLsB);
+
+ Operand sarOrShr = signed
+ ? context.ShiftRightSI(op, negShiftLsB)
+ : context.ShiftRightUI(op, negShiftLsB);
- Operand res = context.ConditionalSelect(isPositive, shl, shr);
+ Operand res = context.ConditionalSelect(isPositive, shl, sarOrShr);
- Operand isOutOfRange = context.BitwiseOr(
- context.ICompareGreaterOrEqual(shiftLsB, Const(8 << size)),
- context.ICompareGreaterOrEqual(negShiftLsB, Const(8 << size)));
+ if (signed)
+ {
+ Operand isPositive2 = context.ICompareGreaterOrEqual(op, Const(0L));
+
+ Operand res2 = context.ConditionalSelect(isPositive2, Const(0L), Const(-1L));
+ res2 = context.ConditionalSelect(isPositive, Const(0L), res2);
- return context.ConditionalSelect(isOutOfRange, Const(0UL), res);
+ return context.ConditionalSelect(isInRange, res, res2);
+ }
+ else
+ {
+ return context.ConditionalSelect(isInRange, res, Const(0UL));
+ }
}
private static void EmitVectorShrImmNarrowOpZx(ArmEmitterContext context, bool round)
@@ -1174,5 +1167,26 @@ namespace ARMeilleure.Instructions
context.Copy(GetVec(op.Rd), res);
}
}
+
+ private static void EmitSshlOrUshl(ArmEmitterContext context, bool signed, bool scalar)
+ {
+ OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = !scalar ? op.GetBytesCount() >> op.Size : 1;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract (context, op.Rn, index, op.Size, signed);
+ Operand me = EmitVectorExtractSx(context, op.Rm, index << op.Size, 0);
+
+ Operand e = EmitShlRegOp(context, ne, context.ConvertI64ToI32(me), op.Size, signed);
+
+ res = EmitVectorInsert(context, res, e, index, op.Size);
+ }
+
+ context.Copy(GetVec(op.Rd), res);
+ }
}
}
diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs
index e4d08456..69b5d3fc 100644
--- a/ARMeilleure/Instructions/InstName.cs
+++ b/ARMeilleure/Instructions/InstName.cs
@@ -212,14 +212,18 @@ namespace ARMeilleure.Instructions
Fmax_V,
Fmaxnm_S,
Fmaxnm_V,
+ Fmaxnmp_V,
Fmaxnmv_V,
Fmaxp_V,
+ Fmaxv_V,
Fmin_S,
Fmin_V,
Fminnm_S,
Fminnm_V,
+ Fminnmp_V,
Fminnmv_V,
Fminp_V,
+ Fminv_V,
Fmla_Se,
Fmla_V,
Fmla_Ve,
@@ -378,6 +382,7 @@ namespace ARMeilleure.Instructions
Srshr_V,
Srsra_S,
Srsra_V,
+ Sshl_S,
Sshl_V,
Sshll_V,
Sshr_S,
@@ -444,6 +449,7 @@ namespace ARMeilleure.Instructions
Urshr_V,
Ursra_S,
Ursra_V,
+ Ushl_S,
Ushl_V,
Ushll_V,
Ushr_S,
diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs
index deffabe1..b951caf8 100644
--- a/ARMeilleure/Translation/PTC/Ptc.cs
+++ b/ARMeilleure/Translation/PTC/Ptc.cs
@@ -19,8 +19,8 @@ namespace ARMeilleure.Translation.PTC
public static class Ptc
{
private const string HeaderMagic = "PTChd";
-
- private const int InternalVersion = 9; //! To be incremented manually for each change to the ARMeilleure project.
+
+ private const int InternalVersion = 10; //! To be incremented manually for each change to the ARMeilleure project.
private const string BaseDir = "Ryujinx";