aboutsummaryrefslogtreecommitdiff
path: root/ChocolArm64/Instruction/AVectorHelper.cs
diff options
context:
space:
mode:
authorgdkchan <gab.dark.100@gmail.com>2018-09-26 23:30:21 -0300
committerGitHub <noreply@github.com>2018-09-26 23:30:21 -0300
commit0b52ee66272b673cecebcf9ae9baaf03899e0ee3 (patch)
treea004a0f7215e4c371ee99c187c291a0e11a0365e /ChocolArm64/Instruction/AVectorHelper.cs
parent40282da93a45c90b3d5a696199ee353a1ae8c730 (diff)
Optimize BIC, BSL, BIT, BIF, XTN, ZIP, DUP (Gp), FMADD (Scalar) and FCVT (Scalar) using SSE intrinsics (#405)
* Optimize BIC, BSL, BIT, BIF, XTN, ZIP, DUP (Gp), FMADD (Scalar) and FCVT (Scalar) using SSE intrinsics, some CQ improvements * Remove useless space * Address PR feedback * Revert EmitVectorZero32_128 changes
Diffstat (limited to 'ChocolArm64/Instruction/AVectorHelper.cs')
-rw-r--r--ChocolArm64/Instruction/AVectorHelper.cs420
1 files changed, 313 insertions, 107 deletions
diff --git a/ChocolArm64/Instruction/AVectorHelper.cs b/ChocolArm64/Instruction/AVectorHelper.cs
index 3e4452ab..7f9d98cd 100644
--- a/ChocolArm64/Instruction/AVectorHelper.cs
+++ b/ChocolArm64/Instruction/AVectorHelper.cs
@@ -227,7 +227,16 @@ namespace ChocolArm64.Instruction
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static double VectorExtractDouble(Vector128<float> Vector, byte Index)
{
- return BitConverter.Int64BitsToDouble(VectorExtractIntSx(Vector, Index, 3));
+ if (Sse41.IsSupported)
+ {
+ return BitConverter.Int64BitsToDouble(Sse41.Extract(Sse.StaticCast<float, long>(Vector), Index));
+ }
+ else if (Sse2.IsSupported)
+ {
+ return BitConverter.Int64BitsToDouble((long)VectorExtractIntZx(Vector, Index, 3));
+ }
+
+ throw new PlatformNotSupportedException();
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -235,41 +244,49 @@ namespace ChocolArm64.Instruction
{
if (Sse41.IsSupported)
{
- switch (Size)
+ if (Size == 0)
{
- case 0:
- return (sbyte)Sse41.Extract(Sse.StaticCast<float, byte>(Vector), Index);
-
- case 1:
- return (short)Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), Index);
-
- case 2:
- return Sse41.Extract(Sse.StaticCast<float, int>(Vector), Index);
-
- case 3:
- return Sse41.Extract(Sse.StaticCast<float, long>(Vector), Index);
+ return (sbyte)Sse41.Extract(Sse.StaticCast<float, byte>(Vector), Index);
+ }
+ else if (Size == 1)
+ {
+ return (short)Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), Index);
+ }
+ else if (Size == 2)
+ {
+ return Sse41.Extract(Sse.StaticCast<float, int>(Vector), Index);
+ }
+ else if (Size == 3)
+ {
+ return Sse41.Extract(Sse.StaticCast<float, long>(Vector), Index);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(Size));
}
-
- throw new ArgumentOutOfRangeException(nameof(Size));
}
else if (Sse2.IsSupported)
{
- switch (Size)
+ if (Size == 0)
{
- case 0:
- return (sbyte)VectorExtractIntZx(Vector, Index, Size);
-
- case 1:
- return (short)VectorExtractIntZx(Vector, Index, Size);
-
- case 2:
- return (int)VectorExtractIntZx(Vector, Index, Size);
-
- case 3:
- return (long)VectorExtractIntZx(Vector, Index, Size);
+ return (sbyte)VectorExtractIntZx(Vector, Index, Size);
+ }
+ else if (Size == 1)
+ {
+ return (short)VectorExtractIntZx(Vector, Index, Size);
+ }
+ else if (Size == 2)
+ {
+ return (int)VectorExtractIntZx(Vector, Index, Size);
+ }
+ else if (Size == 3)
+ {
+ return (long)VectorExtractIntZx(Vector, Index, Size);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(Size));
}
-
- throw new ArgumentOutOfRangeException(nameof(Size));
}
throw new PlatformNotSupportedException();
@@ -280,22 +297,26 @@ namespace ChocolArm64.Instruction
{
if (Sse41.IsSupported)
{
- switch (Size)
+ if (Size == 0)
{
- case 0:
- return Sse41.Extract(Sse.StaticCast<float, byte>(Vector), Index);
-
- case 1:
- return Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), Index);
-
- case 2:
- return Sse41.Extract(Sse.StaticCast<float, uint>(Vector), Index);
-
- case 3:
- return Sse41.Extract(Sse.StaticCast<float, ulong>(Vector), Index);
+ return Sse41.Extract(Sse.StaticCast<float, byte>(Vector), Index);
+ }
+ else if (Size == 1)
+ {
+ return Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), Index);
+ }
+ else if (Size == 2)
+ {
+ return Sse41.Extract(Sse.StaticCast<float, uint>(Vector), Index);
+ }
+ else if (Size == 3)
+ {
+ return Sse41.Extract(Sse.StaticCast<float, ulong>(Vector), Index);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(Size));
}
-
- throw new ArgumentOutOfRangeException(nameof(Size));
}
else if (Sse2.IsSupported)
{
@@ -305,35 +326,35 @@ namespace ChocolArm64.Instruction
ushort Value = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)ShortIdx);
- switch (Size)
+ if (Size == 0)
{
- case 0:
- return (byte)(Value >> (Index & 1) * 8);
-
- case 1:
- return Value;
+ return (byte)(Value >> (Index & 1) * 8);
+ }
+ else if (Size == 1)
+ {
+ return Value;
+ }
+ else if (Size == 2 || Size == 3)
+ {
+ ushort Value1 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 1));
- case 2:
- case 3:
+ if (Size == 2)
{
- ushort Value1 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 1));
-
- if (Size == 2)
- {
- return (uint)(Value | (Value1 << 16));
- }
+ return (uint)(Value | (Value1 << 16));
+ }
- ushort Value2 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 2));
- ushort Value3 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 3));
+ ushort Value2 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 2));
+ ushort Value3 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 3));
- return ((ulong)Value << 0) |
- ((ulong)Value1 << 16) |
- ((ulong)Value2 << 32) |
- ((ulong)Value3 << 48);
- }
+ return ((ulong)Value << 0) |
+ ((ulong)Value1 << 16) |
+ ((ulong)Value2 << 32) |
+ ((ulong)Value3 << 48);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(Size));
}
-
- throw new ArgumentOutOfRangeException(nameof(Size));
}
throw new PlatformNotSupportedException();
@@ -370,22 +391,26 @@ namespace ChocolArm64.Instruction
{
if (Sse41.IsSupported)
{
- switch (Size)
+ if (Size == 0)
{
- case 0:
- return Sse.StaticCast<byte, float>(Sse41.Insert(Sse.StaticCast<float, byte>(Vector), (byte)Value, Index));
-
- case 1:
- return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(Vector), (ushort)Value, Index));
-
- case 2:
- return Sse.StaticCast<uint, float>(Sse41.Insert(Sse.StaticCast<float, uint>(Vector), (uint)Value, Index));
-
- case 3:
- return Sse.StaticCast<ulong, float>(Sse41.Insert(Sse.StaticCast<float, ulong>(Vector), Value, Index));
+ return Sse.StaticCast<byte, float>(Sse41.Insert(Sse.StaticCast<float, byte>(Vector), (byte)Value, Index));
+ }
+ else if (Size == 1)
+ {
+ return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(Vector), (ushort)Value, Index));
+ }
+ else if (Size == 2)
+ {
+ return Sse.StaticCast<uint, float>(Sse41.Insert(Sse.StaticCast<float, uint>(Vector), (uint)Value, Index));
+ }
+ else if (Size == 3)
+ {
+ return Sse.StaticCast<ulong, float>(Sse41.Insert(Sse.StaticCast<float, ulong>(Vector), Value, Index));
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(Size));
}
-
- throw new ArgumentOutOfRangeException(nameof(Size));
}
else if (Sse2.IsSupported)
{
@@ -395,41 +420,39 @@ namespace ChocolArm64.Instruction
? Index >> 1
: Index << (Size - 1);
- switch (Size)
+ if (Size == 0)
{
- case 0:
- {
- ushort ShortVal = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)ShortIdx);
+ ushort ShortVal = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)ShortIdx);
- int Shift = (Index & 1) * 8;
+ int Shift = (Index & 1) * 8;
- ShortVal &= (ushort)(0xff00 >> Shift);
+ ShortVal &= (ushort)(0xff00 >> Shift);
- ShortVal |= (ushort)((byte)Value << Shift);
+ ShortVal |= (ushort)((byte)Value << Shift);
- return Sse.StaticCast<ushort, float>(Sse2.Insert(ShortVector, ShortVal, (byte)ShortIdx));
- }
-
- case 1:
- return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(Vector), (ushort)Value, Index));
+ return Sse.StaticCast<ushort, float>(Sse2.Insert(ShortVector, ShortVal, (byte)ShortIdx));
+ }
+ else if (Size == 1)
+ {
+ return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(Vector), (ushort)Value, Index));
+ }
+ else if (Size == 2 || Size == 3)
+ {
+ ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 0), (byte)(ShortIdx + 0));
+ ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 16), (byte)(ShortIdx + 1));
- case 2:
- case 3:
+ if (Size == 3)
{
- ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 0), (byte)(ShortIdx + 0));
- ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 16), (byte)(ShortIdx + 1));
-
- if (Size == 3)
- {
- ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 32), (byte)(ShortIdx + 2));
- ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 48), (byte)(ShortIdx + 3));
- }
-
- return Sse.StaticCast<ushort, float>(ShortVector);
+ ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 32), (byte)(ShortIdx + 2));
+ ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 48), (byte)(ShortIdx + 3));
}
- }
- throw new ArgumentOutOfRangeException(nameof(Size));
+ return Sse.StaticCast<ushort, float>(ShortVector);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(Size));
+ }
}
throw new PlatformNotSupportedException();
@@ -440,7 +463,29 @@ namespace ChocolArm64.Instruction
{
if (Sse41.IsSupported)
{
- return Sse41.Insert(Vector, Value, (byte)(Index << 4));
+ //Note: The if/else if is necessary to enable the JIT to
+ //produce a single INSERTPS instruction instead of the
+ //jump table fallback.
+ if (Index == 0)
+ {
+ return Sse41.Insert(Vector, Value, 0x00);
+ }
+ else if (Index == 1)
+ {
+ return Sse41.Insert(Vector, Value, 0x10);
+ }
+ else if (Index == 2)
+ {
+ return Sse41.Insert(Vector, Value, 0x20);
+ }
+ else if (Index == 3)
+ {
+ return Sse41.Insert(Vector, Value, 0x30);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(Index));
+ }
}
else if (Sse2.IsSupported)
{
@@ -461,6 +506,79 @@ namespace ChocolArm64.Instruction
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<float> Sse41VectorInsertScalarSingle(float Value, Vector128<float> Vector)
+ {
+ //Note: 0b1110 is the mask to zero the upper bits.
+ return Sse41.Insert(Vector, Value, 0b1110);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<sbyte> VectorSByteZero()
+ {
+ if (Sse2.IsSupported)
+ {
+ return Sse2.SetZeroVector128<sbyte>();
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<short> VectorInt16Zero()
+ {
+ if (Sse2.IsSupported)
+ {
+ return Sse2.SetZeroVector128<short>();
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<int> VectorInt32Zero()
+ {
+ if (Sse2.IsSupported)
+ {
+ return Sse2.SetZeroVector128<int>();
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<long> VectorInt64Zero()
+ {
+ if (Sse2.IsSupported)
+ {
+ return Sse2.SetZeroVector128<long>();
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<float> VectorSingleZero()
+ {
+ if (Sse.IsSupported)
+ {
+ return Sse.SetZeroVector128();
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<double> VectorDoubleZero()
+ {
+ if (Sse2.IsSupported)
+ {
+ return Sse2.SetZeroVector128<double>();
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorZero32_128(Vector128<float> Vector)
{
if (Sse.IsSupported)
@@ -516,6 +634,50 @@ namespace ChocolArm64.Instruction
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<byte> VectorSingleToByte(Vector128<float> Vector)
+ {
+ if (Sse.IsSupported)
+ {
+ return Sse.StaticCast<float, byte>(Vector);
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<ushort> VectorSingleToUInt16(Vector128<float> Vector)
+ {
+ if (Sse.IsSupported)
+ {
+ return Sse.StaticCast<float, ushort>(Vector);
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<uint> VectorSingleToUInt32(Vector128<float> Vector)
+ {
+ if (Sse.IsSupported)
+ {
+ return Sse.StaticCast<float, uint>(Vector);
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<ulong> VectorSingleToUInt64(Vector128<float> Vector)
+ {
+ if (Sse.IsSupported)
+ {
+ return Sse.StaticCast<float, ulong>(Vector);
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<double> VectorSingleToDouble(Vector128<float> Vector)
{
if (Sse.IsSupported)
@@ -571,6 +733,50 @@ namespace ChocolArm64.Instruction
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<float> VectorByteToSingle(Vector128<byte> Vector)
+ {
+ if (Sse.IsSupported)
+ {
+ return Sse.StaticCast<byte, float>(Vector);
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<float> VectorUInt16ToSingle(Vector128<ushort> Vector)
+ {
+ if (Sse.IsSupported)
+ {
+ return Sse.StaticCast<ushort, float>(Vector);
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<float> VectorUInt32ToSingle(Vector128<uint> Vector)
+ {
+ if (Sse.IsSupported)
+ {
+ return Sse.StaticCast<uint, float>(Vector);
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static Vector128<float> VectorUInt64ToSingle(Vector128<ulong> Vector)
+ {
+ if (Sse.IsSupported)
+ {
+ return Sse.StaticCast<ulong, float>(Vector);
+ }
+
+ throw new PlatformNotSupportedException();
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector128<float> VectorDoubleToSingle(Vector128<double> Vector)
{
if (Sse.IsSupported)