aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgdkchan <gab.dark.100@gmail.com>2018-08-14 23:54:12 -0300
committerGitHub <noreply@github.com>2018-08-14 23:54:12 -0300
commit55374ebba0ed49bc4624e47cc971b1e63f644583 (patch)
tree1a9f0df633dcd3c8db80f419849bbfdf2a29e395
parent0673dc183a03f58ff558e85054db456e83184df7 (diff)
Zero out bits 63:32 of scalar float operations with SSE intrinsics (#273)
-rw-r--r--ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs16
-rw-r--r--ChocolArm64/Instruction/AInstEmitSimdCmp.cs12
-rw-r--r--ChocolArm64/Instruction/AInstEmitSimdHelper.cs34
-rw-r--r--ChocolArm64/Instruction/AVectorHelper.cs23
4 files changed, 69 insertions, 16 deletions
diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
index 1d7b16dd..92da9ff9 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs
@@ -305,7 +305,7 @@ namespace ChocolArm64.Instruction
{
if (AOptimizations.UseSse && AOptimizations.UseSse2)
{
- EmitSseOrSse2CallF(Context, nameof(Sse.AddScalar));
+ EmitScalarSseOrSse2CallF(Context, nameof(Sse.AddScalar));
}
else
{
@@ -317,7 +317,7 @@ namespace ChocolArm64.Instruction
{
if (AOptimizations.UseSse && AOptimizations.UseSse2)
{
- EmitSseOrSse2CallF(Context, nameof(Sse.Add));
+ EmitVectorSseOrSse2CallF(Context, nameof(Sse.Add));
}
else
{
@@ -375,7 +375,7 @@ namespace ChocolArm64.Instruction
{
if (AOptimizations.UseSse && AOptimizations.UseSse2)
{
- EmitSseOrSse2CallF(Context, nameof(Sse.DivideScalar));
+ EmitScalarSseOrSse2CallF(Context, nameof(Sse.DivideScalar));
}
else
{
@@ -387,7 +387,7 @@ namespace ChocolArm64.Instruction
{
if (AOptimizations.UseSse && AOptimizations.UseSse2)
{
- EmitSseOrSse2CallF(Context, nameof(Sse.Divide));
+ EmitVectorSseOrSse2CallF(Context, nameof(Sse.Divide));
}
else
{
@@ -526,7 +526,7 @@ namespace ChocolArm64.Instruction
{
if (AOptimizations.UseSse && AOptimizations.UseSse2)
{
- EmitSseOrSse2CallF(Context, nameof(Sse.MultiplyScalar));
+ EmitScalarSseOrSse2CallF(Context, nameof(Sse.MultiplyScalar));
}
else
{
@@ -543,7 +543,7 @@ namespace ChocolArm64.Instruction
{
if (AOptimizations.UseSse && AOptimizations.UseSse2)
{
- EmitSseOrSse2CallF(Context, nameof(Sse.Multiply));
+ EmitVectorSseOrSse2CallF(Context, nameof(Sse.Multiply));
}
else
{
@@ -910,7 +910,7 @@ namespace ChocolArm64.Instruction
{
if (AOptimizations.UseSse && AOptimizations.UseSse2)
{
- EmitSseOrSse2CallF(Context, nameof(Sse.SubtractScalar));
+ EmitScalarSseOrSse2CallF(Context, nameof(Sse.SubtractScalar));
}
else
{
@@ -922,7 +922,7 @@ namespace ChocolArm64.Instruction
{
if (AOptimizations.UseSse && AOptimizations.UseSse2)
{
- EmitSseOrSse2CallF(Context, nameof(Sse.Subtract));
+ EmitVectorSseOrSse2CallF(Context, nameof(Sse.Subtract));
}
else
{
diff --git a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs
index c2d47747..6357396d 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs
@@ -158,7 +158,7 @@ namespace ChocolArm64.Instruction
if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse
&& AOptimizations.UseSse2)
{
- EmitSseOrSse2CallF(Context, nameof(Sse.CompareEqualScalar));
+ EmitScalarSseOrSse2CallF(Context, nameof(Sse.CompareEqualScalar));
}
else
{
@@ -171,7 +171,7 @@ namespace ChocolArm64.Instruction
if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse
&& AOptimizations.UseSse2)
{
- EmitSseOrSse2CallF(Context, nameof(Sse.CompareEqual));
+ EmitVectorSseOrSse2CallF(Context, nameof(Sse.CompareEqual));
}
else
{
@@ -184,7 +184,7 @@ namespace ChocolArm64.Instruction
if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse
&& AOptimizations.UseSse2)
{
- EmitSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanOrEqualScalar));
+ EmitScalarSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanOrEqualScalar));
}
else
{
@@ -197,7 +197,7 @@ namespace ChocolArm64.Instruction
if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse
&& AOptimizations.UseSse2)
{
- EmitSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanOrEqual));
+ EmitVectorSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanOrEqual));
}
else
{
@@ -210,7 +210,7 @@ namespace ChocolArm64.Instruction
if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse
&& AOptimizations.UseSse2)
{
- EmitSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanScalar));
+ EmitScalarSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThanScalar));
}
else
{
@@ -223,7 +223,7 @@ namespace ChocolArm64.Instruction
if (Context.CurrOp is AOpCodeSimdReg && AOptimizations.UseSse
&& AOptimizations.UseSse2)
{
- EmitSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThan));
+ EmitVectorSseOrSse2CallF(Context, nameof(Sse.CompareGreaterThan));
}
else
{
diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
index a9af3902..4ecfdae3 100644
--- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
+++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs
@@ -110,7 +110,17 @@ namespace ChocolArm64.Instruction
}
}
- public static void EmitSseOrSse2CallF(AILEmitterCtx Context, string Name)
+ public static void EmitScalarSseOrSse2CallF(AILEmitterCtx Context, string Name)
+ {
+ EmitSseOrSse2CallF(Context, Name, true);
+ }
+
+ public static void EmitVectorSseOrSse2CallF(AILEmitterCtx Context, string Name)
+ {
+ EmitSseOrSse2CallF(Context, Name, false);
+ }
+
+ public static void EmitSseOrSse2CallF(AILEmitterCtx Context, string Name, bool Scalar)
{
AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp;
@@ -160,7 +170,18 @@ namespace ChocolArm64.Instruction
Context.EmitStvec(Op.Rd);
- if (Op.RegisterSize == ARegisterSize.SIMD64)
+ if (Scalar)
+ {
+ if (SizeF == 0)
+ {
+ EmitVectorZero32_128(Context, Op.Rd);
+ }
+ else /* if (SizeF == 1) */
+ {
+ EmitVectorZeroUpper(Context, Op.Rd);
+ }
+ }
+ else if (Op.RegisterSize == ARegisterSize.SIMD64)
{
EmitVectorZeroUpper(Context, Op.Rd);
}
@@ -1238,6 +1259,15 @@ namespace ChocolArm64.Instruction
EmitVectorInsert(Context, Rd, 1, 3, 0);
}
+ public static void EmitVectorZero32_128(AILEmitterCtx Context, int Reg)
+ {
+ Context.EmitLdvec(Reg);
+
+ AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorZero32_128));
+
+ Context.EmitStvec(Reg);
+ }
+
public static void EmitVectorInsert(AILEmitterCtx Context, int Reg, int Index, int Size)
{
ThrowIfInvalid(Index, Size);
diff --git a/ChocolArm64/Instruction/AVectorHelper.cs b/ChocolArm64/Instruction/AVectorHelper.cs
index b2d53740..3e4452ab 100644
--- a/ChocolArm64/Instruction/AVectorHelper.cs
+++ b/ChocolArm64/Instruction/AVectorHelper.cs
@@ -9,6 +9,18 @@ namespace ChocolArm64.Instruction
{
static class AVectorHelper
{
+ private static readonly Vector128<float> Zero32_128Mask;
+
+ static AVectorHelper()
+ {
+ if (!Sse2.IsSupported)
+ {
+ throw new PlatformNotSupportedException();
+ }
+
+ Zero32_128Mask = Sse.StaticCast<uint, float>(Sse2.SetVector128(0, 0, 0, 0xffffffff));
+ }
+
public static void EmitCall(AILEmitterCtx Context, string Name64, string Name128)
{
bool IsSimd64 = Context.CurrOp.RegisterSize == ARegisterSize.SIMD64;
@@ -449,6 +461,17 @@ namespace ChocolArm64.Instruction
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<float> VectorZero32_128(Vector128<float> Vector)
+ {
+ if (Sse.IsSupported)
+ {
+ return Sse.And(Vector, Zero32_128Mask);
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<sbyte> VectorSingleToSByte(Vector128<float> Vector)
{
if (Sse.IsSupported)