aboutsummaryrefslogtreecommitdiff
path: root/ARMeilleure/Instructions
diff options
context:
space:
mode:
authorgdkchan <gab.dark.100@gmail.com>2020-03-10 21:49:27 -0300
committerGitHub <noreply@github.com>2020-03-11 11:49:27 +1100
commitc26f3774bdbf3982149a3ea4c0f7abb4de869db7 (patch)
tree45805ff76e7a4f486d5132d39ec7f901f462adcb /ARMeilleure/Instructions
parent89ccec197ec9a5db2bb308ef3e9178910d1ab7a8 (diff)
Implement VMULL, VMLSL, VRSHR, VQRSHRN, VQRSHRUN AArch32 instructions + other fixes (#977)
* Implement VMULL, VMLSL, VQRSHRN, VQRSHRUN AArch32 instructions plus other fixes * Re-align opcode table * Re-enable undefined, use subclasses to fix checks * Add test and fix VRSHR instruction * PR feedback
Diffstat (limited to 'ARMeilleure/Instructions')
-rw-r--r--ARMeilleure/Instructions/InstEmitAlu32.cs2
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs203
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdHelper32.cs95
-rw-r--r--ARMeilleure/Instructions/InstEmitSimdShift32.cs182
-rw-r--r--ARMeilleure/Instructions/InstName.cs5
5 files changed, 399 insertions, 88 deletions
diff --git a/ARMeilleure/Instructions/InstEmitAlu32.cs b/ARMeilleure/Instructions/InstEmitAlu32.cs
index 469a18a0..d57ff0b6 100644
--- a/ARMeilleure/Instructions/InstEmitAlu32.cs
+++ b/ARMeilleure/Instructions/InstEmitAlu32.cs
@@ -128,8 +128,6 @@ namespace ARMeilleure.Instructions
public static void Cmp(ArmEmitterContext context)
{
- IOpCode32Alu op = (IOpCode32Alu)context.CurrOp;
-
Operand n = GetAluN(context);
Operand m = GetAluM(context, setCarry: false);
diff --git a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
index 73f25b98..19a10f68 100644
--- a/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs
@@ -2,6 +2,7 @@
using ARMeilleure.IntermediateRepresentation;
using ARMeilleure.Translation;
using System;
+using System.Diagnostics;
using static ARMeilleure.Instructions.InstEmitFlowHelper;
using static ARMeilleure.Instructions.InstEmitHelper;
@@ -113,20 +114,13 @@ namespace ARMeilleure.Instructions
Operand insert = GetIntA32(context, op.Rt);
// Zero extend into an I64, then replicate. Saves the most time over elementwise inserts.
- switch (op.Size)
+ insert = op.Size switch
{
- case 2:
- insert = context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u));
- break;
- case 1:
- insert = context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u));
- break;
- case 0:
- insert = context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u));
- break;
- default:
- throw new InvalidOperationException("Unknown Vdup Size.");
- }
+ 2 => context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)),
+ 1 => context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)),
+ 0 => context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)),
+ _ => throw new InvalidOperationException($"Invalid Vdup size \"{op.Size}\".")
+ };
InsertScalar(context, op.Vd, insert);
if (op.Q)
@@ -142,20 +136,13 @@ namespace ARMeilleure.Instructions
Operand insert = EmitVectorExtractZx32(context, op.Vm >> 1, ((op.Vm & 1) << (3 - op.Size)) + op.Index, op.Size);
// Zero extend into an I64, then replicate. Saves the most time over elementwise inserts.
- switch (op.Size)
+ insert = op.Size switch
{
- case 2:
- insert = context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u));
- break;
- case 1:
- insert = context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u));
- break;
- case 0:
- insert = context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u));
- break;
- default:
- throw new InvalidOperationException("Unknown Vdup Size.");
- }
+ 2 => context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)),
+ 1 => context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)),
+ 0 => context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)),
+ _ => throw new InvalidOperationException($"Invalid Vdup size \"{op.Size}\".")
+ };
InsertScalar(context, op.Vd, insert);
if (op.Q)
@@ -575,51 +562,53 @@ namespace ARMeilleure.Instructions
}
}
- public static void Vmul_S(ArmEmitterContext context)
+ public static void Vmla_S(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
{
- EmitScalarBinaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd);
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd);
}
else if (Optimizations.FastFP)
{
- EmitScalarBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2));
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ {
+ return context.Add(op1, context.Multiply(op2, op3));
+ });
}
else
{
- EmitScalarBinaryOpF32(context, (op1, op2) =>
+ EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
{
- return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2);
+ return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3);
});
}
}
- public static void Vmul_V(ArmEmitterContext context)
+ public static void Vmla_V(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
{
- EmitVectorBinaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
+ EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd);
}
else if (Optimizations.FastFP)
{
- EmitVectorBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2));
+ EmitVectorTernaryOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)));
}
else
{
- EmitVectorBinaryOpF32(context, (op1, op2) =>
+ EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
{
- return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulFpscr, SoftFloat64.FPMulFpscr, op1, op2);
+ return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulAddFpscr, SoftFloat64.FPMulAddFpscr, op1, op2, op3);
});
}
}
- public static void Vmul_I(ArmEmitterContext context)
+ public static void Vmla_I(ArmEmitterContext context)
{
- if ((context.CurrOp as OpCode32SimdReg).U) throw new NotImplementedException("Polynomial mode not implemented");
- EmitVectorBinaryOpSx32(context, (op1, op2) => context.Multiply(op1, op2));
+ EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)));
}
- public static void Vmul_1(ArmEmitterContext context)
+ public static void Vmla_1(ArmEmitterContext context)
{
OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
@@ -627,70 +616,70 @@ namespace ARMeilleure.Instructions
{
if (Optimizations.FastFP && Optimizations.UseSse2)
{
- EmitVectorByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
+ EmitVectorsByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd);
}
else if (Optimizations.FastFP)
{
- EmitVectorByScalarOpF32(context, (op1, op2) => context.Multiply(op1, op2));
+ EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)));
}
else
{
- EmitVectorByScalarOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulFpscr, SoftFloat64.FPMulFpscr, op1, op2));
+ EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulAddFpscr, SoftFloat64.FPMulAddFpscr, op1, op2, op3));
}
}
else
{
- EmitVectorByScalarOpI32(context, (op1, op2) => context.Multiply(op1, op2), false);
+ EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)), false);
}
}
- public static void Vmla_S(ArmEmitterContext context)
+ public static void Vmls_S(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
{
- EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd);
+ EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd);
}
else if (Optimizations.FastFP)
{
EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
{
- return context.Add(op1, context.Multiply(op2, op3));
+ return context.Subtract(op1, context.Multiply(op2, op3));
});
}
else
{
EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
{
- return EmitSoftFloatCall(context, SoftFloat32.FPMulAdd, SoftFloat64.FPMulAdd, op1, op2, op3);
+ return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3);
});
}
}
- public static void Vmla_V(ArmEmitterContext context)
+ public static void Vmls_V(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
{
- EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd);
+ EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd);
}
else if (Optimizations.FastFP)
{
- EmitVectorTernaryOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)));
+ EmitVectorTernaryOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)));
}
else
{
EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
{
- return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulAddFpscr, SoftFloat64.FPMulAddFpscr, op1, op2, op3);
+ return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulSubFpscr, SoftFloat64.FPMulSubFpscr, op1, op2, op3);
});
}
}
- public static void Vmla_I(ArmEmitterContext context)
+ public static void Vmls_I(ArmEmitterContext context)
{
- EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)));
+ EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)));
}
- public static void Vmla_1(ArmEmitterContext context)
+ public static void Vmls_1(ArmEmitterContext context)
{
OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
@@ -698,70 +687,83 @@ namespace ARMeilleure.Instructions
{
if (Optimizations.FastFP && Optimizations.UseSse2)
{
- EmitVectorsByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd);
+ EmitVectorsByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd);
}
else if (Optimizations.FastFP)
{
- EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)));
+ EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)));
}
else
{
- EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulAddFpscr, SoftFloat64.FPMulAddFpscr, op1, op2, op3));
+ EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulSubFpscr, SoftFloat64.FPMulSubFpscr, op1, op2, op3));
}
}
else
{
- EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)), false);
+ EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)), false);
}
}
- public static void Vmls_S(ArmEmitterContext context)
+ public static void Vmlsl_I(ArmEmitterContext context)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ EmitVectorTernaryLongOpI32(context, (opD, op1, op2) => context.Subtract(opD, context.Multiply(op1, op2)), !op.U);
+ }
+
+ public static void Vmul_S(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
{
- EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd);
+ EmitScalarBinaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd);
}
else if (Optimizations.FastFP)
{
- EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
- {
- return context.Subtract(op1, context.Multiply(op2, op3));
- });
+ EmitScalarBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2));
}
else
{
- EmitScalarTernaryOpF32(context, (op1, op2, op3) =>
+ EmitScalarBinaryOpF32(context, (op1, op2) =>
{
- return EmitSoftFloatCall(context, SoftFloat32.FPMulSub, SoftFloat64.FPMulSub, op1, op2, op3);
+ return EmitSoftFloatCall(context, SoftFloat32.FPMul, SoftFloat64.FPMul, op1, op2);
});
}
}
- public static void Vmls_V(ArmEmitterContext context)
+ public static void Vmul_V(ArmEmitterContext context)
{
if (Optimizations.FastFP && Optimizations.UseSse2)
{
- EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd);
+ EmitVectorBinaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
}
else if (Optimizations.FastFP)
{
- EmitVectorTernaryOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)));
+ EmitVectorBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2));
}
else
{
- EmitVectorTernaryOpF32(context, (op1, op2, op3) =>
+ EmitVectorBinaryOpF32(context, (op1, op2) =>
{
- return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulSubFpscr, SoftFloat64.FPMulSubFpscr, op1, op2, op3);
+ return EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulFpscr, SoftFloat64.FPMulFpscr, op1, op2);
});
}
}
- public static void Vmls_I(ArmEmitterContext context)
+ public static void Vmul_I(ArmEmitterContext context)
{
- EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)));
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ if (op.U) // This instruction is always signed, U indicates polynomial mode.
+ {
+ EmitVectorBinaryOpZx32(context, (op1, op2) => EmitPolynomialMultiply(context, op1, op2, 8 << op.Size));
+ }
+ else
+ {
+ EmitVectorBinaryOpSx32(context, (op1, op2) => context.Multiply(op1, op2));
+ }
}
- public static void Vmls_1(ArmEmitterContext context)
+ public static void Vmul_1(ArmEmitterContext context)
{
OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
@@ -769,20 +771,41 @@ namespace ARMeilleure.Instructions
{
if (Optimizations.FastFP && Optimizations.UseSse2)
{
- EmitVectorsByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd);
+ EmitVectorByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd);
}
else if (Optimizations.FastFP)
{
- EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)));
+ EmitVectorByScalarOpF32(context, (op1, op2) => context.Multiply(op1, op2));
}
else
{
- EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulSubFpscr, SoftFloat64.FPMulSubFpscr, op1, op2, op3));
+ EmitVectorByScalarOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, SoftFloat32.FPMulFpscr, SoftFloat64.FPMulFpscr, op1, op2));
}
}
else
{
- EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)), false);
+ EmitVectorByScalarOpI32(context, (op1, op2) => context.Multiply(op1, op2), false);
+ }
+ }
+
+ public static void Vmull_1(ArmEmitterContext context)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ EmitVectorByScalarLongOpI32(context, (op1, op2) => context.Multiply(op1, op2), !op.U);
+ }
+
+ public static void Vmull_I(ArmEmitterContext context)
+ {
+ OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp;
+
+ if (op.Polynomial)
+ {
+ EmitVectorBinaryLongOpI32(context, (op1, op2) => EmitPolynomialMultiply(context, op1, op2, 8 << op.Size), false);
+ }
+ else
+ {
+ EmitVectorBinaryLongOpI32(context, (op1, op2) => context.Multiply(op1, op2), !op.U);
}
}
@@ -1157,5 +1180,27 @@ namespace ARMeilleure.Instructions
EmitVectorBinaryOpSimd32(context, genericEmit);
}
}
+
+ private static Operand EmitPolynomialMultiply(ArmEmitterContext context, Operand op1, Operand op2, int eSize)
+ {
+ Debug.Assert(eSize <= 32);
+
+ Operand result = eSize == 32 ? Const(0L) : Const(0);
+
+ if (eSize == 32)
+ {
+ op1 = context.ZeroExtend32(OperandType.I64, op1);
+ op2 = context.ZeroExtend32(OperandType.I64, op2);
+ }
+
+ for (int i = 0; i < eSize; i++)
+ {
+ Operand mask = context.BitwiseAnd(op1, Const(op1.Type, 1L << i));
+
+ result = context.BitwiseExclusiveOr(result, context.Multiply(op2, mask));
+ }
+
+ return result;
+ }
}
}
diff --git a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
index d0114e7b..67dc25c7 100644
--- a/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdHelper32.cs
@@ -57,7 +57,6 @@ namespace ARMeilleure.Instructions
// From dreg.
vec = GetVecA32(reg >> 1);
insert = context.VectorInsert(vec, value, reg & 1);
-
}
else
{
@@ -69,6 +68,11 @@ namespace ARMeilleure.Instructions
context.Copy(vec, insert);
}
+ public static Operand ExtractElement(ArmEmitterContext context, int reg, int size, bool signed)
+ {
+ return EmitVectorExtract32(context, reg >> (4 - size), reg & ((16 >> size) - 1), size, signed);
+ }
+
public static void EmitVectorImmUnaryOp32(ArmEmitterContext context, Func1I emit)
{
IOpCode32SimdImm op = (IOpCode32SimdImm)context.CurrOp;
@@ -250,6 +254,57 @@ namespace ARMeilleure.Instructions
context.Copy(GetVecA32(op.Qd), res);
}
+ public static void EmitVectorBinaryLongOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ if (op.Size == 2)
+ {
+ ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne);
+ me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me);
+ }
+
+ res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorTernaryLongOpI32(ArmEmitterContext context, Func3I emit, bool signed)
+ {
+ OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size + 1, signed);
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
+ Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed);
+
+ if (op.Size == 2)
+ {
+ ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne);
+ me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me);
+ }
+
+ res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
public static void EmitVectorTernaryOpI32(ArmEmitterContext context, Func3I emit, bool signed)
{
OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp;
@@ -330,7 +385,7 @@ namespace ARMeilleure.Instructions
{
OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
- Operand m = EmitVectorExtract32(context, op.Vm >> (4 - op.Size), op.Vm & ((1 << (4 - op.Size)) - 1), op.Size, signed);
+ Operand m = ExtractElement(context, op.Vm, op.Size, signed);
Operand res = GetVecA32(op.Qd);
@@ -340,7 +395,37 @@ namespace ARMeilleure.Instructions
{
Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
- res = EmitVectorInsert(context, res, emit(ne, m), op.In + index, op.Size);
+ res = EmitVectorInsert(context, res, emit(ne, m), op.Id + index, op.Size);
+ }
+
+ context.Copy(GetVecA32(op.Qd), res);
+ }
+
+ public static void EmitVectorByScalarLongOpI32(ArmEmitterContext context, Func2I emit, bool signed)
+ {
+ OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp;
+
+ Operand m = ExtractElement(context, op.Vm, op.Size, signed);
+
+ if (op.Size == 2)
+ {
+ m = signed ? context.SignExtend32(OperandType.I64, m) : context.ZeroExtend32(OperandType.I64, m);
+ }
+
+ Operand res = context.VectorZero();
+
+ int elems = op.GetBytesCount() >> op.Size;
+
+ for (int index = 0; index < elems; index++)
+ {
+ Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed);
+
+ if (op.Size == 2)
+ {
+ ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne);
+ }
+
+ res = EmitVectorInsert(context, res, emit(ne, m), index, op.Size + 1);
}
context.Copy(GetVecA32(op.Qd), res);
@@ -454,7 +539,7 @@ namespace ARMeilleure.Instructions
// Narrow
- public static void EmitVectorUnaryNarrowOp32(ArmEmitterContext context, Func1I emit)
+ public static void EmitVectorUnaryNarrowOp32(ArmEmitterContext context, Func1I emit, bool signed = false)
{
OpCode32Simd op = (OpCode32Simd)context.CurrOp;
@@ -465,7 +550,7 @@ namespace ARMeilleure.Instructions
for (int index = 0; index < elems; index++)
{
- Operand m = EmitVectorExtract32(context, op.Qm, index, op.Size + 1, false);
+ Operand m = EmitVectorExtract32(context, op.Qm, index, op.Size + 1, signed);
res = EmitVectorInsert(context, res, emit(m), id + index, op.Size);
}
diff --git a/ARMeilleure/Instructions/InstEmitSimdShift32.cs b/ARMeilleure/Instructions/InstEmitSimdShift32.cs
index 89385476..e57c92a3 100644
--- a/ARMeilleure/Instructions/InstEmitSimdShift32.cs
+++ b/ARMeilleure/Instructions/InstEmitSimdShift32.cs
@@ -1,5 +1,6 @@
using ARMeilleure.Decoders;
using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.State;
using ARMeilleure.Translation;
using System;
using System.Diagnostics;
@@ -11,6 +12,78 @@ namespace ARMeilleure.Instructions
{
static partial class InstEmit32
{
+ public static void Vqrshrn(ArmEmitterContext context)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+
+ EmitRoundShrImmSaturatingNarrowOp(context, op.U ? ShrImmSaturatingNarrowFlags.VectorZxZx : ShrImmSaturatingNarrowFlags.VectorSxSx);
+ }
+
+ public static void Vqrshrun(ArmEmitterContext context)
+ {
+ EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx);
+ }
+
+ public static void Vrshr(ArmEmitterContext context)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+ int shift = GetImmShr(op);
+ long roundConst = 1L << (shift - 1);
+
+ if (op.U)
+ {
+ if (op.Size < 2)
+ {
+ EmitVectorUnaryOpZx32(context, (op1) =>
+ {
+ op1 = context.Add(op1, Const(op1.Type, roundConst));
+
+ return context.ShiftRightUI(op1, Const(shift));
+ });
+ }
+ else if (op.Size == 2)
+ {
+ EmitVectorUnaryOpZx32(context, (op1) =>
+ {
+ op1 = context.ZeroExtend32(OperandType.I64, op1);
+ op1 = context.Add(op1, Const(op1.Type, roundConst));
+
+ return context.ConvertI64ToI32(context.ShiftRightUI(op1, Const(shift)));
+ });
+ }
+ else /* if (op.Size == 3) */
+ {
+ EmitVectorUnaryOpZx32(context, (op1) => EmitShrImm64(context, op1, signed: false, roundConst, shift));
+ }
+ }
+ else
+ {
+ if (op.Size < 2)
+ {
+ EmitVectorUnaryOpSx32(context, (op1) =>
+ {
+ op1 = context.Add(op1, Const(op1.Type, roundConst));
+
+ return context.ShiftRightSI(op1, Const(shift));
+ });
+ }
+ else if (op.Size == 2)
+ {
+ EmitVectorUnaryOpSx32(context, (op1) =>
+ {
+ op1 = context.SignExtend32(OperandType.I64, op1);
+ op1 = context.Add(op1, Const(op1.Type, roundConst));
+
+ return context.ConvertI64ToI32(context.ShiftRightSI(op1, Const(shift)));
+ });
+ }
+ else /* if (op.Size == 3) */
+ {
+ EmitVectorUnaryOpZx32(context, (op1) => EmitShrImm64(context, op1, signed: true, roundConst, shift));
+ }
+ }
+ }
+
public static void Vshl(ArmEmitterContext context)
{
OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
@@ -35,7 +108,7 @@ namespace ARMeilleure.Instructions
public static void Vshr(ArmEmitterContext context)
{
OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
- int shift = (8 << op.Size) - op.Shift; // Shr amount is flipped.
+ int shift = GetImmShr(op);
int maxShift = (8 << op.Size) - 1;
if (op.U)
@@ -51,7 +124,7 @@ namespace ARMeilleure.Instructions
public static void Vshrn(ArmEmitterContext context)
{
OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
- int shift = (8 << op.Size) - op.Shift; // Shr amount is flipped.
+ int shift = GetImmShr(op);
EmitVectorUnaryNarrowOp32(context, (op1) => context.ShiftRightUI(op1, Const(shift)));
}
@@ -96,5 +169,110 @@ namespace ARMeilleure.Instructions
return context.ConditionalSelect(isOutOfRange0, Const(op.Type, 0), context.ConditionalSelect(isOutOfRangeN, min, res));
}
}
+
+ [Flags]
+ private enum ShrImmSaturatingNarrowFlags
+ {
+ Scalar = 1 << 0,
+ SignedSrc = 1 << 1,
+ SignedDst = 1 << 2,
+
+ Round = 1 << 3,
+
+ ScalarSxSx = Scalar | SignedSrc | SignedDst,
+ ScalarSxZx = Scalar | SignedSrc,
+ ScalarZxZx = Scalar,
+
+ VectorSxSx = SignedSrc | SignedDst,
+ VectorSxZx = SignedSrc,
+ VectorZxZx = 0
+ }
+
+ private static void EmitRoundShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
+ {
+ EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags);
+ }
+
+ private static void EmitShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags)
+ {
+ OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp;
+
+ bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0;
+ bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0;
+ bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0;
+ bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0;
+
+ if (scalar)
+ {
+ // TODO: Support scalar operation.
+ throw new NotImplementedException();
+ }
+
+ int shift = GetImmShr(op);
+ long roundConst = 1L << (shift - 1);
+
+ EmitVectorUnaryNarrowOp32(context, (op1) =>
+ {
+ if (op.Size <= 1 || !round)
+ {
+ if (round)
+ {
+ op1 = context.Add(op1, Const(op1.Type, roundConst));
+ }
+
+ op1 = signedSrc ? context.ShiftRightSI(op1, Const(shift)) : context.ShiftRightUI(op1, Const(shift));
+ }
+ else /* if (op.Size == 2 && round) */
+ {
+ op1 = EmitShrImm64(context, op1, signedSrc, roundConst, shift); // shift <= 32
+ }
+
+ return EmitSatQ(context, op1, 8 << op.Size, signedDst);
+ }, signedSrc);
+ }
+
+ private static int GetImmShr(OpCode32SimdShImm op)
+ {
+ return (8 << op.Size) - op.Shift; // Shr amount is flipped.
+ }
+
+ // dst64 = (Int(src64, signed) + roundConst) >> shift;
+ private static Operand EmitShrImm64(
+ ArmEmitterContext context,
+ Operand value,
+ bool signed,
+ long roundConst,
+ int shift)
+ {
+ Delegate dlg = signed
+ ? (Delegate)new _S64_S64_S64_S32(SoftFallback.SignedShrImm64)
+ : (Delegate)new _U64_U64_S64_S32(SoftFallback.UnsignedShrImm64);
+
+ return context.Call(dlg, value, Const(roundConst), Const(shift));
+ }
+
+ private static Operand EmitSatQ(ArmEmitterContext context, Operand value, int eSize, bool signed)
+ {
+ Debug.Assert(eSize <= 32);
+
+ long intMin = signed ? -(1L << (eSize - 1)) : 0;
+ long intMax = signed ? (1L << (eSize - 1)) - 1 : (1L << eSize) - 1;
+
+ Operand gt = context.ICompareGreater(value, Const(value.Type, intMax));
+ Operand lt = context.ICompareLess(value, Const(value.Type, intMin));
+
+ value = context.ConditionalSelect(gt, Const(value.Type, intMax), value);
+ value = context.ConditionalSelect(lt, Const(value.Type, intMin), value);
+
+ Operand lblNoSat = Label();
+
+ context.BranchIfFalse(lblNoSat, context.BitwiseOr(gt, lt));
+
+ // TODO: Set QC (to 1) on FPSCR here.
+
+ context.MarkLabel(lblNoSat);
+
+ return value;
+ }
}
}
diff --git a/ARMeilleure/Instructions/InstName.cs b/ARMeilleure/Instructions/InstName.cs
index 69969e9f..5e92da0a 100644
--- a/ARMeilleure/Instructions/InstName.cs
+++ b/ARMeilleure/Instructions/InstName.cs
@@ -560,12 +560,14 @@ namespace ARMeilleure.Instructions
Vminnm,
Vmla,
Vmls,
+ Vmlsl,
Vmov,
Vmovl,
Vmovn,
Vmrs,
Vmsr,
Vmul,
+ Vmull,
Vmvn,
Vneg,
Vnmul,
@@ -573,8 +575,11 @@ namespace ARMeilleure.Instructions
Vnmls,
Vorr,
Vpadd,
+ Vqrshrn,
+ Vqrshrun,
Vrev,
Vrint,
+ Vrshr,
Vsel,
Vshl,
Vshr,