aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs67
-rw-r--r--ChocolArm64/Instruction/AInstEmitSimdCmp.cs20
-rw-r--r--ChocolArm64/Instruction/AInstEmitSimdCvt.cs45
-rw-r--r--ChocolArm64/Instruction/AInstEmitSimdHelper.cs214
-rw-r--r--ChocolArm64/Instruction/AInstEmitSimdLogical.cs156
-rw-r--r--ChocolArm64/Instruction/AInstEmitSimdMove.cs200
-rw-r--r--ChocolArm64/Instruction/AVectorHelper.cs420
-rw-r--r--ChocolArm64/Memory/AMemory.cs8
8 files changed, 875 insertions, 255 deletions
diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
index be549875..811730fc 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
@@ -4,6 +4,7 @@ using ChocolArm64.Translation;
using System;
using System.Reflection;
using System.Reflection.Emit;
+using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instruction.AInstEmitSimdHelper;
@@ -31,7 +32,7 @@ namespace ChocolArm64.Instruction
{
if (AOptimizations.UseSse2)
{
- EmitSse2Call(Context, nameof(Sse2.Add));
+ EmitSse2Op(Context, nameof(Sse2.Add));
}
else
{
@@ -175,7 +176,7 @@ namespace ChocolArm64.Instruction
{
if (AOptimizations.UseSse && AOptimizations.UseSse2)
{
- EmitScalarSseOrSse2CallF(Context, nameof(Sse.AddScalar));
+ EmitScalarSseOrSse2OpF(Context, nameof(Sse.AddScalar));
}
else
{
@@ -187,7 +188,7 @@ namespace ChocolArm64.Instruction
{
if (AOptimizations.UseSse && AOptimizations.UseSse2)
{
- EmitVectorSseOrSse2CallF(Context, nameof(Sse.Add));
+ EmitVectorSseOrSse2OpF(Context, nameof(Sse.Add));
}
else
{
@@ -218,7 +219,7 @@ namespace ChocolArm64.Instruction
{
if (AOptimizations.UseSse && AOptimizations.UseSse2)
{
- EmitScalarSseOrSse2CallF(Context, nameof(Sse.DivideScalar));
+ EmitScalarSseOrSse2OpF(Context, nameof(Sse.DivideScalar));
}
else
{
@@ -230,7 +231,7 @@ namespace ChocolArm64.Instruction
{
if (AOptimizations.UseSse && AOptimizations.UseSse2)
{
- EmitVectorSseOrSse2CallF(Context, nameof(Sse.Divide));
+ EmitVectorSseOrSse2OpF(Context, nameof(Sse.Divide));
}
else
{
@@ -240,11 +241,49 @@ namespace ChocolArm64.Instruction
public static void Fmadd_S(AILEmitterCtx Context)
{
- EmitScalarTernaryRaOpF(Context, () =>
+ if (AOptimizations.UseSse2)
{
- Context.Emit(OpCodes.Mul);
- Context.Emit(OpCodes.Add);
- });
+ AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
+
+ if (Op.Size == 0)
+ {
+ Context.EmitLdvec(Op.Ra);
+ Context.EmitLdvec(Op.Rn);
+ Context.EmitLdvec(Op.Rm);
+
+ Type[] Types = new Type[] { typeof(Vector128<float>), typeof(Vector128<float>) };
+
+ Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.MultiplyScalar), Types));
+ Context.EmitCall(typeof(Sse).GetMethod(nameof(Sse.AddScalar), Types));
+
+ Context.EmitStvec(Op.Rd);
+
+ EmitVectorZero32_128(Context, Op.Rd);
+ }
+ else /* if (Op.Size == 1) */
+ {
+ EmitLdvecWithCastToDouble(Context, Op.Ra);
+ EmitLdvecWithCastToDouble(Context, Op.Rn);
+ EmitLdvecWithCastToDouble(Context, Op.Rm);
+
+ Type[] Types = new Type[] { typeof(Vector128<double>), typeof(Vector128<double>) };
+
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MultiplyScalar), Types));
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AddScalar), Types));
+
+ EmitStvecWithCastFromDouble(Context, Op.Rd);
+
+ EmitVectorZeroUpper(Context, Op.Rd);
+ }
+ }
+ else
+ {
+ EmitScalarTernaryRaOpF(Context, () =>
+ {
+ Context.Emit(OpCodes.Mul);
+ Context.Emit(OpCodes.Add);
+ });
+ }
}
public static void Fmax_S(AILEmitterCtx Context)
@@ -379,7 +418,7 @@ namespace ChocolArm64.Instruction
{
if (AOptimizations.UseSse && AOptimizations.UseSse2)
{
- EmitScalarSseOrSse2CallF(Context, nameof(Sse.MultiplyScalar));
+ EmitScalarSseOrSse2OpF(Context, nameof(Sse.MultiplyScalar));
}
else
{
@@ -396,7 +435,7 @@ namespace ChocolArm64.Instruction
{
if (AOptimizations.UseSse && AOptimizations.UseSse2)
{
- EmitVectorSseOrSse2CallF(Context, nameof(Sse.Multiply));
+ EmitVectorSseOrSse2OpF(Context, nameof(Sse.Multiply));
}
else
{
@@ -763,7 +802,7 @@ namespace ChocolArm64.Instruction
{
if (AOptimizations.UseSse && AOptimizations.UseSse2)
{
- EmitScalarSseOrSse2CallF(Context, nameof(Sse.SubtractScalar));
+ EmitScalarSseOrSse2OpF(Context, nameof(Sse.SubtractScalar));
}
else
{
@@ -775,7 +814,7 @@ namespace ChocolArm64.Instruction
{
if (AOptimizations.UseSse && AOptimizations.UseSse2)
{
- EmitVectorSseOrSse2CallF(Context, nameof(Sse.Subtract));
+ EmitVectorSseOrSse2OpF(Context, nameof(Sse.Subtract));
}
else
{
@@ -1103,7 +1142,7 @@ namespace ChocolArm64.Instruction
{
if (AOptimizations.UseSse2)
{
- EmitSse2Call(Context, nameof(Sse2.Subtract));
+ EmitSse2Op(Context, nameof(Sse2.Subtract));
}
else
{
diff --git a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs
index 6357396d..97f7623f 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs
@@ -23,11 +23,11 @@ namespace ChocolArm64.Instruction
{
if (Op.Size < 3 && AOptimizations.UseSse2)
{
- EmitSse2Call(Context, nameof(Sse2.CompareEqual));
+ EmitSse2Op(Context, nameof(Sse2.CompareEqual));
}
else if (Op.Size == 3 && AOptimizations.UseSse41)
{
- EmitSse41Call(Context, nameof(Sse41.CompareEqual));
+ EmitSse41Op(Context, nameof(Sse41.CompareEqual));
}
else
{
@@ -61,11 +61,11 @@ namespace ChocolArm64.Instruction
{
if (Op.Size < 3 && AOptimizations.UseSse2)
{
- EmitSse2Call(Context, nameof(Sse2.CompareGreaterThan));
+ EmitSse2Op(Context, nameof(Sse2.CompareGreaterThan));
}
else if (Op.Size == 3 && AOptimizations.UseSse42)
{
- EmitSse42Call(Context, nameof(Sse42.CompareGreaterThan));
+ EmitSse42Op(Context, nameof(Sse42.CompareGreaterThan));
}
else
{
@@ -158,7 +158,7 @@ namespace ChocolArm64.Instruction
if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse
&& AOptimizations.UseSse2)
{
- EmitScalarSseOrSse2CallF(Context, nameof(Sse.CompareEqualScalar));
+ EmitScalarSseOrSse2OpF(Context, nameof(Sse.CompareEqualScalar));
}
else
{
@@ -171,7 +171,7 @@ namespace ChocolArm64.Instruction
if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse
&& AOptimizations.UseSse2)
{
- EmitVectorSseOrSse2CallF(Context, nameof(Sse.CompareEqual));
+ EmitVectorSseOrSse2OpF(Context, nameof(Sse.CompareEqual));
}
else
{
@@ -184,7 +184,7 @@ namespace ChocolArm64.Instruction
if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse
&& AOptimizations.UseSse2)
{
- EmitScalarSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanOrEqualScalar));
+ EmitScalarSseOrSse2OpF(Context, nameof(Sse.CompareGreaterThanOrEqualScalar));
}
else
{
@@ -197,7 +197,7 @@ namespace ChocolArm64.Instruction
if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse
&& AOptimizations.UseSse2)
{
- EmitVectorSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanOrEqual));
+ EmitVectorSseOrSse2OpF(Context, nameof(Sse.CompareGreaterThanOrEqual));
}
else
{
@@ -210,7 +210,7 @@ namespace ChocolArm64.Instruction
if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse
&& AOptimizations.UseSse2)
{
- EmitScalarSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanScalar));
+ EmitScalarSseOrSse2OpF(Context, nameof(Sse.CompareGreaterThanScalar));
}
else
{
@@ -223,7 +223,7 @@ namespace ChocolArm64.Instruction
if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse
&& AOptimizations.UseSse2)
{
- EmitVectorSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThan));
+ EmitVectorSseOrSse2OpF(Context, nameof(Sse.CompareGreaterThan));
}
else
{
diff --git a/ChocolArm64/Instruction/AInstEmitSimdCvt.cs b/ChocolArm64/Instruction/AInstEmitSimdCvt.cs
index 231de0af..76d984a2 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdCvt.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdCvt.cs
@@ -3,6 +3,8 @@ using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection.Emit;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instruction.AInstEmitSimdHelper;
@@ -14,11 +16,48 @@ namespace ChocolArm64.Instruction
{
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
- EmitVectorExtractF(Context, Op.Rn, 0, Op.Size);
+ if (AOptimizations.UseSse2)
+ {
+ if (Op.Size == 1 && Op.Opc == 0)
+ {
+ //Double -> Single.
+ AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleZero));
+
+ EmitLdvecWithCastToDouble(Context, Op.Rn);
+
+ Type[] Types = new Type[] { typeof(Vector128<float>), typeof(Vector128<double>) };
+
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertScalarToVector128Single), Types));
+
+ Context.EmitStvec(Op.Rd);
+ }
+ else if (Op.Size == 0 && Op.Opc == 1)
+ {
+ //Single -> Double.
+ AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorDoubleZero));
+
+ Context.EmitLdvec(Op.Rn);
+
+ Type[] Types = new Type[] { typeof(Vector128<double>), typeof(Vector128<float>) };
- EmitFloatCast(Context, Op.Opc);
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ConvertScalarToVector128Double), Types));
- EmitScalarSetF(Context, Op.Rd, Op.Opc);
+ EmitStvecWithCastFromDouble(Context, Op.Rd);
+ }
+ else
+ {
+ //Invalid encoding.
+ throw new InvalidOperationException();
+ }
+ }
+ else
+ {
+ EmitVectorExtractF(Context, Op.Rn, 0, Op.Size);
+
+ EmitFloatCast(Context, Op.Opc);
+
+ EmitScalarSetF(Context, Op.Rd, Op.Opc);
+ }
}
public static void Fcvtas_Gp(AILEmitterCtx Context)
diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
index 171de43b..381fc46a 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
@@ -4,7 +4,6 @@ using ChocolArm64.Translation;
using System;
using System.Reflection;
using System.Reflection.Emit;
-using System.Runtime.CompilerServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
@@ -12,6 +11,38 @@ namespace ChocolArm64.Instruction
{
static class AInstEmitSimdHelper
{
+ public static readonly Type[] IntTypesPerSizeLog2 = new Type[]
+ {
+ typeof(sbyte),
+ typeof(short),
+ typeof(int),
+ typeof(long)
+ };
+
+ public static readonly Type[] UIntTypesPerSizeLog2 = new Type[]
+ {
+ typeof(byte),
+ typeof(ushort),
+ typeof(uint),
+ typeof(ulong)
+ };
+
+ public static readonly Type[] VectorIntTypesPerSizeLog2 = new Type[]
+ {
+ typeof(Vector128<sbyte>),
+ typeof(Vector128<short>),
+ typeof(Vector128<int>),
+ typeof(Vector128<long>)
+ };
+
+ public static readonly Type[] VectorUIntTypesPerSizeLog2 = new Type[]
+ {
+ typeof(Vector128<byte>),
+ typeof(Vector128<ushort>),
+ typeof(Vector128<uint>),
+ typeof(Vector128<ulong>)
+ };
+
[Flags]
public enum OperFlags
{
@@ -36,91 +67,133 @@ namespace ChocolArm64.Instruction
return (8 << (Op.Size + 1)) - Op.Imm;
}
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static void EmitSse2Call(AILEmitterCtx Context, string Name)
+ public static void EmitSse2Op(AILEmitterCtx Context, string Name)
{
- EmitSseCall(Context, Name, typeof(Sse2));
+ EmitSseOp(Context, Name, typeof(Sse2));
}
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static void EmitSse41Call(AILEmitterCtx Context, string Name)
+ public static void EmitSse41Op(AILEmitterCtx Context, string Name)
{
- EmitSseCall(Context, Name, typeof(Sse41));
+ EmitSseOp(Context, Name, typeof(Sse41));
}
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static void EmitSse42Call(AILEmitterCtx Context, string Name)
+ public static void EmitSse42Op(AILEmitterCtx Context, string Name)
{
- EmitSseCall(Context, Name, typeof(Sse42));
+ EmitSseOp(Context, Name, typeof(Sse42));
}
- private static void EmitSseCall(AILEmitterCtx Context, string Name, Type Type)
+ private static void EmitSseOp(AILEmitterCtx Context, string Name, Type Type)
{
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
- void Ldvec(int Reg)
+ EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size);
+
+ Type BaseType = VectorIntTypesPerSizeLog2[Op.Size];
+
+ if (Op is AOpCodeSimdReg BinOp)
{
- Context.EmitLdvec(Reg);
+ EmitLdvecWithSignedCast(Context, BinOp.Rm, Op.Size);
- switch (Op.Size)
- {
- case 0: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToSByte)); break;
- case 1: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToInt16)); break;
- case 2: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToInt32)); break;
- case 3: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToInt64)); break;
- }
+ Context.EmitCall(Type.GetMethod(Name, new Type[] { BaseType, BaseType }));
+ }
+ else
+ {
+ Context.EmitCall(Type.GetMethod(Name, new Type[] { BaseType }));
}
- Ldvec(Op.Rn);
-
- Type BaseType = null;
+ EmitStvecWithSignedCast(Context, Op.Rd, Op.Size);
- switch (Op.Size)
+ if (Op.RegisterSize == ARegisterSize.SIMD64)
{
- case 0: BaseType = typeof(Vector128<sbyte>); break;
- case 1: BaseType = typeof(Vector128<short>); break;
- case 2: BaseType = typeof(Vector128<int>); break;
- case 3: BaseType = typeof(Vector128<long>); break;
+ EmitVectorZeroUpper(Context, Op.Rd);
}
+ }
- if (Op is AOpCodeSimdReg BinOp)
+ public static void EmitLdvecWithSignedCast(AILEmitterCtx Context, int Reg, int Size)
+ {
+ Context.EmitLdvec(Reg);
+
+ switch (Size)
{
- Ldvec(BinOp.Rm);
+ case 0: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToSByte)); break;
+ case 1: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToInt16)); break;
+ case 2: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToInt32)); break;
+ case 3: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToInt64)); break;
- Context.EmitCall(Type.GetMethod(Name, new Type[] { BaseType, BaseType }));
+ default: throw new ArgumentOutOfRangeException(nameof(Size));
}
- else
+ }
+
+ public static void EmitLdvecWithCastToDouble(AILEmitterCtx Context, int Reg)
+ {
+ Context.EmitLdvec(Reg);
+
+ AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToDouble));
+ }
+
+ public static void EmitStvecWithCastFromDouble(AILEmitterCtx Context, int Reg)
+ {
+ AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorDoubleToSingle));
+
+ Context.EmitStvec(Reg);
+ }
+
+ public static void EmitLdvecWithUnsignedCast(AILEmitterCtx Context, int Reg, int Size)
+ {
+ Context.EmitLdvec(Reg);
+
+ switch (Size)
{
- Context.EmitCall(Type.GetMethod(Name, new Type[] { BaseType }));
+ case 0: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToByte)); break;
+ case 1: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToUInt16)); break;
+ case 2: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToUInt32)); break;
+ case 3: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleToUInt64)); break;
+
+ default: throw new ArgumentOutOfRangeException(nameof(Size));
}
+ }
- switch (Op.Size)
+ public static void EmitStvecWithSignedCast(AILEmitterCtx Context, int Reg, int Size)
+ {
+ switch (Size)
{
case 0: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSByteToSingle)); break;
case 1: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt16ToSingle)); break;
case 2: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt32ToSingle)); break;
case 3: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt64ToSingle)); break;
+
+ default: throw new ArgumentOutOfRangeException(nameof(Size));
}
- Context.EmitStvec(Op.Rd);
+ Context.EmitStvec(Reg);
+ }
- if (Op.RegisterSize == ARegisterSize.SIMD64)
+ public static void EmitStvecWithUnsignedCast(AILEmitterCtx Context, int Reg, int Size)
+ {
+ switch (Size)
{
- EmitVectorZeroUpper(Context, Op.Rd);
+ case 0: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorByteToSingle)); break;
+ case 1: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorUInt16ToSingle)); break;
+ case 2: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorUInt32ToSingle)); break;
+ case 3: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorUInt64ToSingle)); break;
+
+ default: throw new ArgumentOutOfRangeException(nameof(Size));
}
+
+ Context.EmitStvec(Reg);
}
- public static void EmitScalarSseOrSse2CallF(AILEmitterCtx Context, string Name)
+ public static void EmitScalarSseOrSse2OpF(AILEmitterCtx Context, string Name)
{
- EmitSseOrSse2CallF(Context, Name, true);
+ EmitSseOrSse2OpF(Context, Name, true);
}
- public static void EmitVectorSseOrSse2CallF(AILEmitterCtx Context, string Name)
+ public static void EmitVectorSseOrSse2OpF(AILEmitterCtx Context, string Name)
{
- EmitSseOrSse2CallF(Context, Name, false);
+ EmitSseOrSse2OpF(Context, Name, false);
}
- public static void EmitSseOrSse2CallF(AILEmitterCtx Context, string Name, bool Scalar)
+ public static void EmitSseOrSse2OpF(AILEmitterCtx Context, string Name, bool Scalar)
{
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
@@ -1183,8 +1256,21 @@ namespace ChocolArm64.Instruction
public static void EmitScalarSetF(AILEmitterCtx Context, int Reg, int Size)
{
- EmitVectorZeroAll(Context, Reg);
- EmitVectorInsertF(Context, Reg, 0, Size);
+ if (AOptimizations.UseSse41 && Size == 0)
+ {
+ //If the type is float, we can perform insertion and
+ //zero the upper bits with a single instruction (INSERTPS);
+ Context.EmitLdvec(Reg);
+
+ AVectorHelper.EmitCall(Context, nameof(AVectorHelper.Sse41VectorInsertScalarSingle));
+
+ Context.EmitStvec(Reg);
+ }
+ else
+ {
+ EmitVectorZeroAll(Context, Reg);
+ EmitVectorInsertF(Context, Reg, 0, Size);
+ }
}
public static void EmitVectorExtractSx(AILEmitterCtx Context, int Reg, int Index, int Size)
@@ -1235,8 +1321,17 @@ namespace ChocolArm64.Instruction
public static void EmitVectorZeroAll(AILEmitterCtx Context, int Rd)
{
- EmitVectorZeroLower(Context, Rd);
- EmitVectorZeroUpper(Context, Rd);
+ if (AOptimizations.UseSse2)
+ {
+ AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorSingleZero));
+
+ Context.EmitStvec(Rd);
+ }
+ else
+ {
+ EmitVectorZeroLower(Context, Rd);
+ EmitVectorZeroUpper(Context, Rd);
+ }
}
public static void EmitVectorZeroLower(AILEmitterCtx Context, int Rd)
@@ -1249,9 +1344,32 @@ namespace ChocolArm64.Instruction
EmitVectorInsertTmp(Context, 0, 3, 0);
}
- public static void EmitVectorZeroUpper(AILEmitterCtx Context, int Rd)
+ public static void EmitVectorZeroUpper(AILEmitterCtx Context, int Reg)
{
- EmitVectorInsert(Context, Rd, 1, 3, 0);
+ if (AOptimizations.UseSse2)
+ {
+ //TODO: Use MoveScalar once it is fixed, as of the
+ //time of writing it just crashes the JIT.
+ EmitLdvecWithUnsignedCast(Context, Reg, 3);
+
+ Type[] Types = new Type[] { typeof(Vector128<ulong>), typeof(byte) };
+
+ //Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.MoveScalar), Types));
+
+ Context.EmitLdc_I4(8);
+
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical128BitLane), Types));
+
+ Context.EmitLdc_I4(8);
+
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), Types));
+
+ EmitStvecWithUnsignedCast(Context, Reg, 3);
+ }
+ else
+ {
+ EmitVectorInsert(Context, Reg, 1, 3, 0);
+ }
}
public static void EmitVectorZero32_128(AILEmitterCtx Context, int Reg)
diff --git a/ChocolArm64/Instruction/AInstEmitSimdLogical.cs b/ChocolArm64/Instruction/AInstEmitSimdLogical.cs
index 9f5af96c..1aa8981f 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdLogical.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdLogical.cs
@@ -15,7 +15,7 @@ namespace ChocolArm64.Instruction
{
if (AOptimizations.UseSse2)
{
- EmitSse2Call(Context, nameof(Sse2.And));
+ EmitSse2Op(Context, nameof(Sse2.And));
}
else
{
@@ -25,11 +25,36 @@ namespace ChocolArm64.Instruction
public static void Bic_V(AILEmitterCtx Context)
{
- EmitVectorBinaryOpZx(Context, () =>
+ if (AOptimizations.UseSse2)
{
- Context.Emit(OpCodes.Not);
- Context.Emit(OpCodes.And);
- });
+ AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
+
+ EmitLdvecWithUnsignedCast(Context, Op.Rm, Op.Size);
+ EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size);
+
+ Type[] Types = new Type[]
+ {
+ VectorUIntTypesPerSizeLog2[Op.Size],
+ VectorUIntTypesPerSizeLog2[Op.Size]
+ };
+
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.AndNot), Types));
+
+ EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size);
+
+ if (Op.RegisterSize == ARegisterSize.SIMD64)
+ {
+ EmitVectorZeroUpper(Context, Op.Rd);
+ }
+ }
+ else
+ {
+ EmitVectorBinaryOpZx(Context, () =>
+ {
+ Context.Emit(OpCodes.Not);
+ Context.Emit(OpCodes.And);
+ });
+ }
}
public static void Bic_Vi(AILEmitterCtx Context)
@@ -55,59 +80,124 @@ namespace ChocolArm64.Instruction
{
AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
- int Bytes = Op.GetBitsCount() >> 3;
- int Elems = Bytes >> Op.Size;
-
- for (int Index = 0; Index < Elems; Index++)
+ if (AOptimizations.UseSse2)
{
- EmitVectorExtractZx(Context, Op.Rd, Index, Op.Size);
- EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size);
+ Type[] Types = new Type[]
+ {
+ VectorUIntTypesPerSizeLog2[Op.Size],
+ VectorUIntTypesPerSizeLog2[Op.Size]
+ };
+
+ EmitLdvecWithUnsignedCast(Context, Op.Rm, Op.Size);
+ EmitLdvecWithUnsignedCast(Context, Op.Rd, Op.Size);
+ EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size);
- Context.Emit(OpCodes.Xor);
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), Types));
- EmitVectorExtractZx(Context, Op.Rm, Index, Op.Size);
+ string Name = NotRm ? nameof(Sse2.AndNot) : nameof(Sse2.And);
- if (NotRm)
+ Context.EmitCall(typeof(Sse2).GetMethod(Name, Types));
+
+ EmitLdvecWithUnsignedCast(Context, Op.Rd, Op.Size);
+
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), Types));
+
+ EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size);
+
+ if (Op.RegisterSize == ARegisterSize.SIMD64)
{
- Context.Emit(OpCodes.Not);
+ EmitVectorZeroUpper(Context, Op.Rd);
}
+ }
+ else
+ {
+ int Bytes = Op.GetBitsCount() >> 3;
+ int Elems = Bytes >> Op.Size;
- Context.Emit(OpCodes.And);
+ for (int Index = 0; Index < Elems; Index++)
+ {
+ EmitVectorExtractZx(Context, Op.Rd, Index, Op.Size);
+ EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size);
- EmitVectorExtractZx(Context, Op.Rd, Index, Op.Size);
+ Context.Emit(OpCodes.Xor);
- Context.Emit(OpCodes.Xor);
+ EmitVectorExtractZx(Context, Op.Rm, Index, Op.Size);
- EmitVectorInsert(Context, Op.Rd, Index, Op.Size);
- }
+ if (NotRm)
+ {
+ Context.Emit(OpCodes.Not);
+ }
- if (Op.RegisterSize == ARegisterSize.SIMD64)
- {
- EmitVectorZeroUpper(Context, Op.Rd);
+ Context.Emit(OpCodes.And);
+
+ EmitVectorExtractZx(Context, Op.Rd, Index, Op.Size);
+
+ Context.Emit(OpCodes.Xor);
+
+ EmitVectorInsert(Context, Op.Rd, Index, Op.Size);
+ }
+
+ if (Op.RegisterSize == ARegisterSize.SIMD64)
+ {
+ EmitVectorZeroUpper(Context, Op.Rd);
+ }
}
}
public static void Bsl_V(AILEmitterCtx Context)
{
- EmitVectorTernaryOpZx(Context, () =>
+ if (AOptimizations.UseSse2)
{
- Context.EmitSttmp();
- Context.EmitLdtmp();
+ AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
- Context.Emit(OpCodes.Xor);
- Context.Emit(OpCodes.And);
+ Type[] Types = new Type[]
+ {
+ VectorUIntTypesPerSizeLog2[Op.Size],
+ VectorUIntTypesPerSizeLog2[Op.Size]
+ };
- Context.EmitLdtmp();
+ EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size);
+ EmitLdvecWithUnsignedCast(Context, Op.Rm, Op.Size);
- Context.Emit(OpCodes.Xor);
- });
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), Types));
+
+ EmitLdvecWithUnsignedCast(Context, Op.Rd, Op.Size);
+
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), Types));
+
+ EmitLdvecWithUnsignedCast(Context, Op.Rm, Op.Size);
+
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Xor), Types));
+
+ EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size);
+
+ if (Op.RegisterSize == ARegisterSize.SIMD64)
+ {
+ EmitVectorZeroUpper(Context, Op.Rd);
+ }
+ }
+ else
+ {
+ EmitVectorTernaryOpZx(Context, () =>
+ {
+ Context.EmitSttmp();
+ Context.EmitLdtmp();
+
+ Context.Emit(OpCodes.Xor);
+ Context.Emit(OpCodes.And);
+
+ Context.EmitLdtmp();
+
+ Context.Emit(OpCodes.Xor);
+ });
+ }
}
public static void Eor_V(AILEmitterCtx Context)
{
if (AOptimizations.UseSse2)
{
- EmitSse2Call(Context, nameof(Sse2.Xor));
+ EmitSse2Op(Context, nameof(Sse2.Xor));
}
else
{
@@ -133,7 +223,7 @@ namespace ChocolArm64.Instruction
{
if (AOptimizations.UseSse2)
{
- EmitSse2Call(Context, nameof(Sse2.Or));
+ EmitSse2Op(Context, nameof(Sse2.Or));
}
else
{
diff --git a/ChocolArm64/Instruction/AInstEmitSimdMove.cs b/ChocolArm64/Instruction/AInstEmitSimdMove.cs
index 3bf1e463..94097f48 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdMove.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdMove.cs
@@ -3,6 +3,7 @@ using ChocolArm64.State;
using ChocolArm64.Translation;
using System;
using System.Reflection.Emit;
+using System.Runtime.Intrinsics.X86;
using static ChocolArm64.Instruction.AInstEmitSimdHelper;
@@ -14,19 +15,44 @@ namespace ChocolArm64.Instruction
{
AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp;
- int Bytes = Op.GetBitsCount() >> 3;
- int Elems = Bytes >> Op.Size;
-
- for (int Index = 0; Index < Elems; Index++)
+ if (AOptimizations.UseSse2)
{
Context.EmitLdintzr(Op.Rn);
- EmitVectorInsert(Context, Op.Rd, Index, Op.Size);
- }
+ switch (Op.Size)
+ {
+ case 0: Context.Emit(OpCodes.Conv_U1); break;
+ case 1: Context.Emit(OpCodes.Conv_U2); break;
+ case 2: Context.Emit(OpCodes.Conv_U4); break;
+ }
- if (Op.RegisterSize == ARegisterSize.SIMD64)
+ Type[] Types = new Type[] { UIntTypesPerSizeLog2[Op.Size] };
+
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), Types));
+
+ EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size);
+
+ if (Op.RegisterSize == ARegisterSize.SIMD64)
+ {
+ EmitVectorZeroUpper(Context, Op.Rd);
+ }
+ }
+ else
{
- EmitVectorZeroUpper(Context, Op.Rd);
+ int Bytes = Op.GetBitsCount() >> 3;
+ int Elems = Bytes >> Op.Size;
+
+ for (int Index = 0; Index < Elems; Index++)
+ {
+ Context.EmitLdintzr(Op.Rn);
+
+ EmitVectorInsert(Context, Op.Rd, Index, Op.Size);
+ }
+
+ if (Op.RegisterSize == ARegisterSize.SIMD64)
+ {
+ EmitVectorZeroUpper(Context, Op.Rd);
+ }
}
}
@@ -295,25 +321,91 @@ namespace ChocolArm64.Instruction
int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0;
- if (Part != 0)
+ if (AOptimizations.UseSse41 && Op.Size < 2)
{
- Context.EmitLdvec(Op.Rd);
- Context.EmitStvectmp();
- }
+ void EmitZeroVector()
+ {
+ switch (Op.Size)
+ {
+ case 0: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt16Zero)); break;
+ case 1: AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInt32Zero)); break;
+ }
+ }
- for (int Index = 0; Index < Elems; Index++)
- {
- EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size + 1);
+ //For XTN, first operand is source, second operand is 0.
+ //For XTN2, first operand is 0, second operand is source.
+ if (Part != 0)
+ {
+ EmitZeroVector();
+ }
- EmitVectorInsertTmp(Context, Part + Index, Op.Size);
- }
+ EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size + 1);
- Context.EmitLdvectmp();
- Context.EmitStvec(Op.Rd);
+ //Set mask to discard the upper half of the wide elements.
+ switch (Op.Size)
+ {
+ case 0: Context.EmitLdc_I4(0x00ff); break;
+ case 1: Context.EmitLdc_I4(0x0000ffff); break;
+ }
+
+ Type WideType = IntTypesPerSizeLog2[Op.Size + 1];
+
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.SetAllVector128), new Type[] { WideType }));
+
+ WideType = VectorIntTypesPerSizeLog2[Op.Size + 1];
+
+ Type[] WideTypes = new Type[] { WideType, WideType };
+
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.And), WideTypes));
+
+ if (Part == 0)
+ {
+ EmitZeroVector();
+ }
+
+ //Pack values with signed saturation, the signed saturation shouldn't
+ //saturate anything since the upper bits were masked off.
+ Type SseType = Op.Size == 0 ? typeof(Sse2) : typeof(Sse41);
+
+ Context.EmitCall(SseType.GetMethod(nameof(Sse2.PackUnsignedSaturate), WideTypes));
+
+ if (Part != 0)
+ {
+ //For XTN2, we additionally need to discard the upper bits
+ //of the target register and OR the result with it.
+ EmitVectorZeroUpper(Context, Op.Rd);
- if (Part == 0)
+ EmitLdvecWithUnsignedCast(Context, Op.Rd, Op.Size);
+
+ Type NarrowType = VectorUIntTypesPerSizeLog2[Op.Size];
+
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Or), new Type[] { NarrowType, NarrowType }));
+ }
+
+ EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size);
+ }
+ else
{
- EmitVectorZeroUpper(Context, Op.Rd);
+ if (Part != 0)
+ {
+ Context.EmitLdvec(Op.Rd);
+ Context.EmitStvectmp();
+ }
+
+ for (int Index = 0; Index < Elems; Index++)
+ {
+ EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size + 1);
+
+ EmitVectorInsertTmp(Context, Part + Index, Op.Size);
+ }
+
+ Context.EmitLdvectmp();
+ Context.EmitStvec(Op.Rd);
+
+ if (Part == 0)
+ {
+ EmitVectorZeroUpper(Context, Op.Rd);
+ }
}
}
@@ -394,28 +486,64 @@ namespace ChocolArm64.Instruction
{
AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp;
- int Words = Op.GetBitsCount() >> 4;
- int Pairs = Words >> Op.Size;
+ if (AOptimizations.UseSse2)
+ {
+ EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size);
+ EmitLdvecWithUnsignedCast(Context, Op.Rm, Op.Size);
- int Base = Part != 0 ? Pairs : 0;
+ Type[] Types = new Type[]
+ {
+ VectorUIntTypesPerSizeLog2[Op.Size],
+ VectorUIntTypesPerSizeLog2[Op.Size]
+ };
- for (int Index = 0; Index < Pairs; Index++)
- {
- int Idx = Index << 1;
+ string Name = Part == 0 || (Part != 0 && Op.RegisterSize == ARegisterSize.SIMD64)
+ ? nameof(Sse2.UnpackLow)
+ : nameof(Sse2.UnpackHigh);
- EmitVectorExtractZx(Context, Op.Rn, Base + Index, Op.Size);
- EmitVectorExtractZx(Context, Op.Rm, Base + Index, Op.Size);
+ Context.EmitCall(typeof(Sse2).GetMethod(Name, Types));
- EmitVectorInsertTmp(Context, Idx + 1, Op.Size);
- EmitVectorInsertTmp(Context, Idx, Op.Size);
- }
+ if (Op.RegisterSize == ARegisterSize.SIMD64 && Part != 0)
+ {
+ Context.EmitLdc_I4(8);
- Context.EmitLdvectmp();
- Context.EmitStvec(Op.Rd);
+ Type[] ShTypes = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) };
- if (Op.RegisterSize == ARegisterSize.SIMD64)
+ Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical128BitLane), ShTypes));
+ }
+
+ EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size);
+
+ if (Op.RegisterSize == ARegisterSize.SIMD64 && Part == 0)
+ {
+ EmitVectorZeroUpper(Context, Op.Rd);
+ }
+ }
+ else
{
- EmitVectorZeroUpper(Context, Op.Rd);
+ int Words = Op.GetBitsCount() >> 4;
+ int Pairs = Words >> Op.Size;
+
+ int Base = Part != 0 ? Pairs : 0;
+
+ for (int Index = 0; Index < Pairs; Index++)
+ {
+ int Idx = Index << 1;
+
+ EmitVectorExtractZx(Context, Op.Rn, Base + Index, Op.Size);
+ EmitVectorExtractZx(Context, Op.Rm, Base + Index, Op.Size);
+
+ EmitVectorInsertTmp(Context, Idx + 1, Op.Size);
+ EmitVectorInsertTmp(Context, Idx, Op.Size);
+ }
+
+ Context.EmitLdvectmp();
+ Context.EmitStvec(Op.Rd);
+
+ if (Op.RegisterSize == ARegisterSize.SIMD64)
+ {
+ EmitVectorZeroUpper(Context, Op.Rd);
+ }
}
}
}
diff --git a/ChocolArm64/Instruction/AVectorHelper.cs b/ChocolArm64/Instruction/AVectorHelper.cs
index 3e4452ab..7f9d98cd 100644
--- a/ChocolArm64/Instruction/AVectorHelper.cs
+++ b/ChocolArm64/Instruction/AVectorHelper.cs
@@ -227,7 +227,16 @@ namespace ChocolArm64.Instruction
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double VectorExtractDouble(Vector128<float> Vector, byte Index)
{
- return BitConverter.Int64BitsToDouble(VectorExtractIntSx(Vector, Index, 3));
+ if (Sse41.IsSupported)
+ {
+ return BitConverter.Int64BitsToDouble(Sse41.Extract(Sse.StaticCast<float, long>(Vector), Index));
+ }
+ else if (Sse2.IsSupported)
+ {
+ return BitConverter.Int64BitsToDouble((long)VectorExtractIntZx(Vector, Index, 3));
+ }
+
+ throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -235,41 +244,49 @@ namespace ChocolArm64.Instruction
{
if (Sse41.IsSupported)
{
- switch (Size)
+ if (Size == 0)
{
- case 0:
- return (sbyte)Sse41.Extract(Sse.StaticCast<float, byte>(Vector), Index);
-
- case 1:
- return (short)Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), Index);
-
- case 2:
- return Sse41.Extract(Sse.StaticCast<float, int>(Vector), Index);
-
- case 3:
- return Sse41.Extract(Sse.StaticCast<float, long>(Vector), Index);
+ return (sbyte)Sse41.Extract(Sse.StaticCast<float, byte>(Vector), Index);
+ }
+ else if (Size == 1)
+ {
+ return (short)Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), Index);
+ }
+ else if (Size == 2)
+ {
+ return Sse41.Extract(Sse.StaticCast<float, int>(Vector), Index);
+ }
+ else if (Size == 3)
+ {
+ return Sse41.Extract(Sse.StaticCast<float, long>(Vector), Index);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(Size));
}
-
- throw new ArgumentOutOfRangeException(nameof(Size));
}
else if (Sse2.IsSupported)
{
- switch (Size)
+ if (Size == 0)
{
- case 0:
- return (sbyte)VectorExtractIntZx(Vector, Index, Size);
-
- case 1:
- return (short)VectorExtractIntZx(Vector, Index, Size);
-
- case 2:
- return (int)VectorExtractIntZx(Vector, Index, Size);
-
- case 3:
- return (long)VectorExtractIntZx(Vector, Index, Size);
+ return (sbyte)VectorExtractIntZx(Vector, Index, Size);
+ }
+ else if (Size == 1)
+ {
+ return (short)VectorExtractIntZx(Vector, Index, Size);
+ }
+ else if (Size == 2)
+ {
+ return (int)VectorExtractIntZx(Vector, Index, Size);
+ }
+ else if (Size == 3)
+ {
+ return (long)VectorExtractIntZx(Vector, Index, Size);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(Size));
}
-
- throw new ArgumentOutOfRangeException(nameof(Size));
}
throw new PlatformNotSupportedException();
@@ -280,22 +297,26 @@ namespace ChocolArm64.Instruction
{
if (Sse41.IsSupported)
{
- switch (Size)
+ if (Size == 0)
{
- case 0:
- return Sse41.Extract(Sse.StaticCast<float, byte>(Vector), Index);
-
- case 1:
- return Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), Index);
-
- case 2:
- return Sse41.Extract(Sse.StaticCast<float, uint>(Vector), Index);
-
- case 3:
- return Sse41.Extract(Sse.StaticCast<float, ulong>(Vector), Index);
+ return Sse41.Extract(Sse.StaticCast<float, byte>(Vector), Index);
+ }
+ else if (Size == 1)
+ {
+ return Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), Index);
+ }
+ else if (Size == 2)
+ {
+ return Sse41.Extract(Sse.StaticCast<float, uint>(Vector), Index);
+ }
+ else if (Size == 3)
+ {
+ return Sse41.Extract(Sse.StaticCast<float, ulong>(Vector), Index);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(Size));
}
-
- throw new ArgumentOutOfRangeException(nameof(Size));
}
else if (Sse2.IsSupported)
{
@@ -305,35 +326,35 @@ namespace ChocolArm64.Instruction
ushort Value = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)ShortIdx);
- switch (Size)
+ if (Size == 0)
{
- case 0:
- return (byte)(Value >> (Index & 1) * 8);
-
- case 1:
- return Value;
+ return (byte)(Value >> (Index & 1) * 8);
+ }
+ else if (Size == 1)
+ {
+ return Value;
+ }
+ else if (Size == 2 || Size == 3)
+ {
+ ushort Value1 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 1));
- case 2:
- case 3:
+ if (Size == 2)
{
- ushort Value1 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 1));
-
- if (Size == 2)
- {
- return (uint)(Value | (Value1 << 16));
- }
+ return (uint)(Value | (Value1 << 16));
+ }
- ushort Value2 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 2));
- ushort Value3 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 3));
+ ushort Value2 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 2));
+ ushort Value3 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 3));
- return ((ulong)Value << 0) |
- ((ulong)Value1 << 16) |
- ((ulong)Value2 << 32) |
- ((ulong)Value3 << 48);
- }
+ return ((ulong)Value << 0) |
+ ((ulong)Value1 << 16) |
+ ((ulong)Value2 << 32) |
+ ((ulong)Value3 << 48);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(Size));
}
-
- throw new ArgumentOutOfRangeException(nameof(Size));
}
throw new PlatformNotSupportedException();
@@ -370,22 +391,26 @@ namespace ChocolArm64.Instruction
{
if (Sse41.IsSupported)
{
- switch (Size)
+ if (Size == 0)
{
- case 0:
- return Sse.StaticCast<byte, float>(Sse41.Insert(Sse.StaticCast<float, byte>(Vector), (byte)Value, Index));
-
- case 1:
- return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(Vector), (ushort)Value, Index));
-
- case 2:
- return Sse.StaticCast<uint, float>(Sse41.Insert(Sse.StaticCast<float, uint>(Vector), (uint)Value, Index));
-
- case 3:
- return Sse.StaticCast<ulong, float>(Sse41.Insert(Sse.StaticCast<float, ulong>(Vector), Value, Index));
+ return Sse.StaticCast<byte, float>(Sse41.Insert(Sse.StaticCast<float, byte>(Vector), (byte)Value, Index));
+ }
+ else if (Size == 1)
+ {
+ return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(Vector), (ushort)Value, Index));
+ }
+ else if (Size == 2)
+ {
+ return Sse.StaticCast<uint, float>(Sse41.Insert(Sse.StaticCast<float, uint>(Vector), (uint)Value, Index));
+ }
+ else if (Size == 3)
+ {
+ return Sse.StaticCast<ulong, float>(Sse41.Insert(Sse.StaticCast<float, ulong>(Vector), Value, Index));
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(Size));
}
-
- throw new ArgumentOutOfRangeException(nameof(Size));
}
else if (Sse2.IsSupported)
{
@@ -395,41 +420,39 @@ namespace ChocolArm64.Instruction
? Index >> 1
: Index << (Size - 1);
- switch (Size)
+ if (Size == 0)
{
- case 0:
- {
- ushort ShortVal = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)ShortIdx);
+ ushort ShortVal = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)ShortIdx);
- int Shift = (Index & 1) * 8;
+ int Shift = (Index & 1) * 8;
- ShortVal &= (ushort)(0xff00 >> Shift);
+ ShortVal &= (ushort)(0xff00 >> Shift);
- ShortVal |= (ushort)((byte)Value << Shift);
+ ShortVal |= (ushort)((byte)Value << Shift);
- return Sse.StaticCast<ushort, float>(Sse2.Insert(ShortVector, ShortVal, (byte)ShortIdx));
- }
-
- case 1:
- return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(Vector), (ushort)Value, Index));
+ return Sse.StaticCast<ushort, float>(Sse2.Insert(ShortVector, ShortVal, (byte)ShortIdx));
+ }
+ else if (Size == 1)
+ {
+ return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(Vector), (ushort)Value, Index));
+ }
+ else if (Size == 2 || Size == 3)
+ {
+ ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 0), (byte)(ShortIdx + 0));
+ ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 16), (byte)(ShortIdx + 1));
- case 2:
- case 3:
+ if (Size == 3)
{
- ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 0), (byte)(ShortIdx + 0));
- ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 16), (byte)(ShortIdx + 1));
-
- if (Size == 3)
- {
- ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 32), (byte)(ShortIdx + 2));
- ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 48), (byte)(ShortIdx + 3));
- }
-
- return Sse.StaticCast<ushort, float>(ShortVector);
+ ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 32), (byte)(ShortIdx + 2));
+ ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 48), (byte)(ShortIdx + 3));
}
- }
- throw new ArgumentOutOfRangeException(nameof(Size));
+ return Sse.StaticCast<ushort, float>(ShortVector);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(Size));
+ }
}
throw new PlatformNotSupportedException();
@@ -440,7 +463,29 @@ namespace ChocolArm64.Instruction
{
if (Sse41.IsSupported)
{
- return Sse41.Insert(Vector, Value, (byte)(Index << 4));
+ //Note: The if/else if is necessary to enable the JIT to
+ //produce a single INSERTPS instruction instead of the
+ //jump table fallback.
+ if (Index == 0)
+ {
+ return Sse41.Insert(Vector, Value, 0x00);
+ }
+ else if (Index == 1)
+ {
+ return Sse41.Insert(Vector, Value, 0x10);
+ }
+ else if (Index == 2)
+ {
+ return Sse41.Insert(Vector, Value, 0x20);
+ }
+ else if (Index == 3)
+ {
+ return Sse41.Insert(Vector, Value, 0x30);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(Index));
+ }
}
else if (Sse2.IsSupported)
{
@@ -461,6 +506,79 @@ namespace ChocolArm64.Instruction
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<float> Sse41VectorInsertScalarSingle(float Value, Vector128<float> Vector)
+ {
+ //Note: 0b1110 is the mask to zero the upper bits.
+ return Sse41.Insert(Vector, Value, 0b1110);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<sbyte> VectorSByteZero()
+ {
+ if (Sse2.IsSupported)
+ {
+ return Sse2.SetZeroVector128<sbyte>();
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<short> VectorInt16Zero()
+ {
+ if (Sse2.IsSupported)
+ {
+ return Sse2.SetZeroVector128<short>();
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<int> VectorInt32Zero()
+ {
+ if (Sse2.IsSupported)
+ {
+ return Sse2.SetZeroVector128<int>();
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<long> VectorInt64Zero()
+ {
+ if (Sse2.IsSupported)
+ {
+ return Sse2.SetZeroVector128<long>();
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<float> VectorSingleZero()
+ {
+ if (Sse.IsSupported)
+ {
+ return Sse.SetZeroVector128();
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<double> VectorDoubleZero()
+ {
+ if (Sse2.IsSupported)
+ {
+ return Sse2.SetZeroVector128<double>();
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorZero32_128(Vector128<float> Vector)
{
if (Sse.IsSupported)
@@ -516,6 +634,50 @@ namespace ChocolArm64.Instruction
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<byte> VectorSingleToByte(Vector128<float> Vector)
+ {
+ if (Sse.IsSupported)
+ {
+ return Sse.StaticCast<float, byte>(Vector);
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<ushort> VectorSingleToUInt16(Vector128<float> Vector)
+ {
+ if (Sse.IsSupported)
+ {
+ return Sse.StaticCast<float, ushort>(Vector);
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<uint> VectorSingleToUInt32(Vector128<float> Vector)
+ {
+ if (Sse.IsSupported)
+ {
+ return Sse.StaticCast<float, uint>(Vector);
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<ulong> VectorSingleToUInt64(Vector128<float> Vector)
+ {
+ if (Sse.IsSupported)
+ {
+ return Sse.StaticCast<float, ulong>(Vector);
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<double> VectorSingleToDouble(Vector128<float> Vector)
{
if (Sse.IsSupported)
@@ -571,6 +733,50 @@ namespace ChocolArm64.Instruction
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<float> VectorByteToSingle(Vector128<byte> Vector)
+ {
+ if (Sse.IsSupported)
+ {
+ return Sse.StaticCast<byte, float>(Vector);
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<float> VectorUInt16ToSingle(Vector128<ushort> Vector)
+ {
+ if (Sse.IsSupported)
+ {
+ return Sse.StaticCast<ushort, float>(Vector);
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<float> VectorUInt32ToSingle(Vector128<uint> Vector)
+ {
+ if (Sse.IsSupported)
+ {
+ return Sse.StaticCast<uint, float>(Vector);
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<float> VectorUInt64ToSingle(Vector128<ulong> Vector)
+ {
+ if (Sse.IsSupported)
+ {
+ return Sse.StaticCast<ulong, float>(Vector);
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorDoubleToSingle(Vector128<double> Vector)
{
if (Sse.IsSupported)
diff --git a/ChocolArm64/Memory/AMemory.cs b/ChocolArm64/Memory/AMemory.cs
index 2cb9b16c..bb6a2b54 100644
--- a/ChocolArm64/Memory/AMemory.cs
+++ b/ChocolArm64/Memory/AMemory.cs
@@ -232,7 +232,7 @@ namespace ChocolArm64.Memory
}
}
- [MethodImpl(MethodImplOptions.NoInlining)]
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public Vector128<float> ReadVector32(long Position)
{
if (Sse.IsSupported)
@@ -245,7 +245,7 @@ namespace ChocolArm64.Memory
}
}
- [MethodImpl(MethodImplOptions.NoInlining)]
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public Vector128<float> ReadVector64(long Position)
{
if (Sse2.IsSupported)
@@ -365,7 +365,7 @@ namespace ChocolArm64.Memory
}
}
- [MethodImpl(MethodImplOptions.NoInlining)]
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public void WriteVector32(long Position, Vector128<float> Value)
{
if (Sse.IsSupported)
@@ -378,7 +378,7 @@ namespace ChocolArm64.Memory
}
}
- [MethodImpl(MethodImplOptions.NoInlining)]
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public void WriteVector64(long Position, Vector128<float> Value)
{
if (Sse2.IsSupported)