diff options
| author | gdkchan <gab.dark.100@gmail.com> | 2018-09-26 23:30:21 -0300 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2018-09-26 23:30:21 -0300 |
| commit | 0b52ee66272b673cecebcf9ae9baaf03899e0ee3 (patch) | |
| tree | a004a0f7215e4c371ee99c187c291a0e11a0365e /ChocolArm64/Instruction/AVectorHelper.cs | |
| parent | 40282da93a45c90b3d5a696199ee353a1ae8c730 (diff) | |
Optimize BIC, BSL, BIT, BIF, XTN, ZIP, DUP (Gp), FMADD (Scalar) and FCVT (Scalar) using SSE intrinsics (#405)
* Optimize BIC, BSL, BIT, BIF, XTN, ZIP, DUP (Gp), FMADD (Scalar) and FCVT (Scalar) using SSE intrinsics, some CQ improvements
* Remove useless space
* Address PR feedback
* Revert EmitVectorZero32_128 changes
Diffstat (limited to 'ChocolArm64/Instruction/AVectorHelper.cs')
| -rw-r--r-- | ChocolArm64/Instruction/AVectorHelper.cs | 420 |
1 files changed, 313 insertions, 107 deletions
diff --git a/ChocolArm64/Instruction/AVectorHelper.cs b/ChocolArm64/Instruction/AVectorHelper.cs index 3e4452ab..7f9d98cd 100644 --- a/ChocolArm64/Instruction/AVectorHelper.cs +++ b/ChocolArm64/Instruction/AVectorHelper.cs @@ -227,7 +227,16 @@ namespace ChocolArm64.Instruction [MethodImpl(MethodImplOptions.AggressiveInlining)] public static double VectorExtractDouble(Vector128<float> Vector, byte Index) { - return BitConverter.Int64BitsToDouble(VectorExtractIntSx(Vector, Index, 3)); + if (Sse41.IsSupported) + { + return BitConverter.Int64BitsToDouble(Sse41.Extract(Sse.StaticCast<float, long>(Vector), Index)); + } + else if (Sse2.IsSupported) + { + return BitConverter.Int64BitsToDouble((long)VectorExtractIntZx(Vector, Index, 3)); + } + + throw new PlatformNotSupportedException(); } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -235,41 +244,49 @@ namespace ChocolArm64.Instruction { if (Sse41.IsSupported) { - switch (Size) + if (Size == 0) { - case 0: - return (sbyte)Sse41.Extract(Sse.StaticCast<float, byte>(Vector), Index); - - case 1: - return (short)Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), Index); - - case 2: - return Sse41.Extract(Sse.StaticCast<float, int>(Vector), Index); - - case 3: - return Sse41.Extract(Sse.StaticCast<float, long>(Vector), Index); + return (sbyte)Sse41.Extract(Sse.StaticCast<float, byte>(Vector), Index); + } + else if (Size == 1) + { + return (short)Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), Index); + } + else if (Size == 2) + { + return Sse41.Extract(Sse.StaticCast<float, int>(Vector), Index); + } + else if (Size == 3) + { + return Sse41.Extract(Sse.StaticCast<float, long>(Vector), Index); + } + else + { + throw new ArgumentOutOfRangeException(nameof(Size)); } - - throw new ArgumentOutOfRangeException(nameof(Size)); } else if (Sse2.IsSupported) { - switch (Size) + if (Size == 0) { - case 0: - return (sbyte)VectorExtractIntZx(Vector, Index, Size); - - case 1: - return (short)VectorExtractIntZx(Vector, Index, Size); - - case 2: - return (int)VectorExtractIntZx(Vector, Index, Size); - - case 3: - return (long)VectorExtractIntZx(Vector, Index, Size); + return (sbyte)VectorExtractIntZx(Vector, Index, Size); + } + else if (Size == 1) + { + return (short)VectorExtractIntZx(Vector, Index, Size); + } + else if (Size == 2) + { + return (int)VectorExtractIntZx(Vector, Index, Size); + } + else if (Size == 3) + { + return (long)VectorExtractIntZx(Vector, Index, Size); + } + else + { + throw new ArgumentOutOfRangeException(nameof(Size)); } - - throw new ArgumentOutOfRangeException(nameof(Size)); } throw new PlatformNotSupportedException(); @@ -280,22 +297,26 @@ namespace ChocolArm64.Instruction { if (Sse41.IsSupported) { - switch (Size) + if (Size == 0) { - case 0: - return Sse41.Extract(Sse.StaticCast<float, byte>(Vector), Index); - - case 1: - return Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), Index); - - case 2: - return Sse41.Extract(Sse.StaticCast<float, uint>(Vector), Index); - - case 3: - return Sse41.Extract(Sse.StaticCast<float, ulong>(Vector), Index); + return Sse41.Extract(Sse.StaticCast<float, byte>(Vector), Index); + } + else if (Size == 1) + { + return Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), Index); + } + else if (Size == 2) + { + return Sse41.Extract(Sse.StaticCast<float, uint>(Vector), Index); + } + else if (Size == 3) + { + return Sse41.Extract(Sse.StaticCast<float, ulong>(Vector), Index); + } + else + { + throw new ArgumentOutOfRangeException(nameof(Size)); } - - throw new ArgumentOutOfRangeException(nameof(Size)); } else if (Sse2.IsSupported) { @@ -305,35 +326,35 @@ namespace ChocolArm64.Instruction ushort Value = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)ShortIdx); - switch (Size) + if (Size == 0) { - case 0: - return (byte)(Value >> (Index & 1) * 8); - - case 1: - return Value; + return (byte)(Value >> (Index & 1) * 8); + } + else if (Size == 1) + { + return Value; + } + else if (Size == 2 || Size == 3) + { + ushort Value1 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 1)); - case 2: - case 3: + if (Size == 2) { - ushort Value1 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 1)); - - if (Size == 2) - { - return (uint)(Value | (Value1 << 16)); - } + return (uint)(Value | (Value1 << 16)); + } - ushort Value2 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 2)); - ushort Value3 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 3)); + ushort Value2 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 2)); + ushort Value3 = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)(ShortIdx + 3)); - return ((ulong)Value << 0) | - ((ulong)Value1 << 16) | - ((ulong)Value2 << 32) | - ((ulong)Value3 << 48); - } + return ((ulong)Value << 0) | + ((ulong)Value1 << 16) | + ((ulong)Value2 << 32) | + ((ulong)Value3 << 48); + } + else + { + throw new ArgumentOutOfRangeException(nameof(Size)); } - - throw new ArgumentOutOfRangeException(nameof(Size)); } throw new PlatformNotSupportedException(); @@ -370,22 +391,26 @@ namespace ChocolArm64.Instruction { if (Sse41.IsSupported) { - switch (Size) + if (Size == 0) { - case 0: - return Sse.StaticCast<byte, float>(Sse41.Insert(Sse.StaticCast<float, byte>(Vector), (byte)Value, Index)); - - case 1: - return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(Vector), (ushort)Value, Index)); - - case 2: - return Sse.StaticCast<uint, float>(Sse41.Insert(Sse.StaticCast<float, uint>(Vector), (uint)Value, Index)); - - case 3: - return Sse.StaticCast<ulong, float>(Sse41.Insert(Sse.StaticCast<float, ulong>(Vector), Value, Index)); + return Sse.StaticCast<byte, float>(Sse41.Insert(Sse.StaticCast<float, byte>(Vector), (byte)Value, Index)); + } + else if (Size == 1) + { + return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(Vector), (ushort)Value, Index)); + } + else if (Size == 2) + { + return Sse.StaticCast<uint, float>(Sse41.Insert(Sse.StaticCast<float, uint>(Vector), (uint)Value, Index)); + } + else if (Size == 3) + { + return Sse.StaticCast<ulong, float>(Sse41.Insert(Sse.StaticCast<float, ulong>(Vector), Value, Index)); + } + else + { + throw new ArgumentOutOfRangeException(nameof(Size)); } - - throw new ArgumentOutOfRangeException(nameof(Size)); } else if (Sse2.IsSupported) { @@ -395,41 +420,39 @@ namespace ChocolArm64.Instruction ? Index >> 1 : Index << (Size - 1); - switch (Size) + if (Size == 0) { - case 0: - { - ushort ShortVal = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)ShortIdx); + ushort ShortVal = Sse2.Extract(Sse.StaticCast<float, ushort>(Vector), (byte)ShortIdx); - int Shift = (Index & 1) * 8; + int Shift = (Index & 1) * 8; - ShortVal &= (ushort)(0xff00 >> Shift); + ShortVal &= (ushort)(0xff00 >> Shift); - ShortVal |= (ushort)((byte)Value << Shift); + ShortVal |= (ushort)((byte)Value << Shift); - return Sse.StaticCast<ushort, float>(Sse2.Insert(ShortVector, ShortVal, (byte)ShortIdx)); - } - - case 1: - return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(Vector), (ushort)Value, Index)); + return Sse.StaticCast<ushort, float>(Sse2.Insert(ShortVector, ShortVal, (byte)ShortIdx)); + } + else if (Size == 1) + { + return Sse.StaticCast<ushort, float>(Sse2.Insert(Sse.StaticCast<float, ushort>(Vector), (ushort)Value, Index)); + } + else if (Size == 2 || Size == 3) + { + ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 0), (byte)(ShortIdx + 0)); + ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 16), (byte)(ShortIdx + 1)); - case 2: - case 3: + if (Size == 3) { - ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 0), (byte)(ShortIdx + 0)); - ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 16), (byte)(ShortIdx + 1)); - - if (Size == 3) - { - ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 32), (byte)(ShortIdx + 2)); - ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 48), (byte)(ShortIdx + 3)); - } - - return Sse.StaticCast<ushort, float>(ShortVector); + ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 32), (byte)(ShortIdx + 2)); + ShortVector = Sse2.Insert(ShortVector, (ushort)(Value >> 48), (byte)(ShortIdx + 3)); } - } - throw new ArgumentOutOfRangeException(nameof(Size)); + return Sse.StaticCast<ushort, float>(ShortVector); + } + else + { + throw new ArgumentOutOfRangeException(nameof(Size)); + } } throw new PlatformNotSupportedException(); @@ -440,7 +463,29 @@ namespace ChocolArm64.Instruction { if (Sse41.IsSupported) { - return Sse41.Insert(Vector, Value, (byte)(Index << 4)); + //Note: The if/else if is necessary to enable the JIT to + //produce a single INSERTPS instruction instead of the + //jump table fallback. + if (Index == 0) + { + return Sse41.Insert(Vector, Value, 0x00); + } + else if (Index == 1) + { + return Sse41.Insert(Vector, Value, 0x10); + } + else if (Index == 2) + { + return Sse41.Insert(Vector, Value, 0x20); + } + else if (Index == 3) + { + return Sse41.Insert(Vector, Value, 0x30); + } + else + { + throw new ArgumentOutOfRangeException(nameof(Index)); + } } else if (Sse2.IsSupported) { @@ -461,6 +506,79 @@ namespace ChocolArm64.Instruction } [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> Sse41VectorInsertScalarSingle(float Value, Vector128<float> Vector) + { + //Note: 0b1110 is the mask to zero the upper bits. + return Sse41.Insert(Vector, Value, 0b1110); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<sbyte> VectorSByteZero() + { + if (Sse2.IsSupported) + { + return Sse2.SetZeroVector128<sbyte>(); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<short> VectorInt16Zero() + { + if (Sse2.IsSupported) + { + return Sse2.SetZeroVector128<short>(); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<int> VectorInt32Zero() + { + if (Sse2.IsSupported) + { + return Sse2.SetZeroVector128<int>(); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<long> VectorInt64Zero() + { + if (Sse2.IsSupported) + { + return Sse2.SetZeroVector128<long>(); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorSingleZero() + { + if (Sse.IsSupported) + { + return Sse.SetZeroVector128(); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<double> VectorDoubleZero() + { + if (Sse2.IsSupported) + { + return Sse2.SetZeroVector128<double>(); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128<float> VectorZero32_128(Vector128<float> Vector) { if (Sse.IsSupported) @@ -516,6 +634,50 @@ namespace ChocolArm64.Instruction } [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<byte> VectorSingleToByte(Vector128<float> Vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<float, byte>(Vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<ushort> VectorSingleToUInt16(Vector128<float> Vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<float, ushort>(Vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<uint> VectorSingleToUInt32(Vector128<float> Vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<float, uint>(Vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<ulong> VectorSingleToUInt64(Vector128<float> Vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<float, ulong>(Vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128<double> VectorSingleToDouble(Vector128<float> Vector) { if (Sse.IsSupported) @@ -571,6 +733,50 @@ namespace ChocolArm64.Instruction } [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorByteToSingle(Vector128<byte> Vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<byte, float>(Vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorUInt16ToSingle(Vector128<ushort> Vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<ushort, float>(Vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorUInt32ToSingle(Vector128<uint> Vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<uint, float>(Vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> VectorUInt64ToSingle(Vector128<ulong> Vector) + { + if (Sse.IsSupported) + { + return Sse.StaticCast<ulong, float>(Vector); + } + + throw new PlatformNotSupportedException(); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector128<float> VectorDoubleToSingle(Vector128<double> Vector) { if (Sse.IsSupported) |
