diff options
| author | LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> | 2020-11-18 19:35:54 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-11-18 19:35:54 +0100 |
| commit | 0679084f115b6838dec4d8c5e85044c33d4122d0 (patch) | |
| tree | 0d25ace42740e37d6bb2a8cd30fa92c5313d265a /ARMeilleure/CodeGen | |
| parent | eafee34feebd432151809df402f3f696e4d93d08 (diff) | |
CPU (A64): Add FP16/FP32 fast paths (F16C Intrinsics) for Fcvt_S, Fcvtl_V & Fcvtn_V Instructions. Now HardwareCapabilities uses CpuId. (#1650)
* net5.0
* CPU (A64): Add FP16/FP32 fast paths (F16C Intrinsics) for Fcvt_S, Fcvtl_V & Fcvtn_V Instructions. Switch to .NET 5.0.
Nits.
Tests performed successfully in both debug and release mode (for all instructions involved).
* Address comment.
* Update appveyor.yml
* Revert "Update appveyor.yml"
This reverts commit 27cdd59e8b90e227e6924d9c162af26c00a89013.
* Remove Assembler CpuId.
* Update appveyor.yml
* Address comment.
Diffstat (limited to 'ARMeilleure/CodeGen')
| -rw-r--r-- | ARMeilleure/CodeGen/X86/Assembler.cs | 8 | ||||
| -rw-r--r-- | ARMeilleure/CodeGen/X86/HardwareCapabilities.cs | 62 | ||||
| -rw-r--r-- | ARMeilleure/CodeGen/X86/IntrinsicTable.cs | 2 | ||||
| -rw-r--r-- | ARMeilleure/CodeGen/X86/X86Instruction.cs | 3 |
4 files changed, 57 insertions, 18 deletions
diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs index b855f1b1..48053efc 100644 --- a/ARMeilleure/CodeGen/X86/Assembler.cs +++ b/ARMeilleure/CodeGen/X86/Assembler.cs @@ -104,7 +104,6 @@ namespace ARMeilleure.CodeGen.X86 Add(X86Instruction.Cmpxchg8, new InstructionInfo(0x00000fb0, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Reg8Src)); Add(X86Instruction.Comisd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Comiss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex)); - Add(X86Instruction.Cpuid, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fa2, InstructionFlags.RegOnly)); Add(X86Instruction.Crc32, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38f1, InstructionFlags.PrefixF2)); Add(X86Instruction.Crc32_16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38f1, InstructionFlags.PrefixF2 | InstructionFlags.Prefix66)); Add(X86Instruction.Crc32_8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38f0, InstructionFlags.PrefixF2 | InstructionFlags.Reg8Src)); @@ -270,6 +269,8 @@ namespace ARMeilleure.CodeGen.X86 Add(X86Instruction.Unpcklps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex)); Add(X86Instruction.Vblendvpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4b, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Vblendvps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4a, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vcvtph2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3813, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vcvtps2ph, new InstructionInfo(0x000f3a1d, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None)); Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66)); @@ -386,11 +387,6 @@ namespace ARMeilleure.CodeGen.X86 WriteInstruction(src1, null, src2, X86Instruction.Comiss); } - public void Cpuid() - { - WriteInstruction(null, null, OperandType.None, X86Instruction.Cpuid); - } - public void Cvtsd2ss(Operand dest, Operand src1, Operand src2) { WriteInstruction(dest, src1, src2, X86Instruction.Cvtsd2ss); diff --git a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs index b622c65c..aa103e30 100644 --- a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs +++ b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs @@ -1,20 +1,60 @@ +using System; using System.Runtime.Intrinsics.X86; namespace ARMeilleure.CodeGen.X86 { static class HardwareCapabilities { - public static bool SupportsSse => Sse.IsSupported; - public static bool SupportsSse2 => Sse2.IsSupported; - public static bool SupportsSse3 => Sse3.IsSupported; - public static bool SupportsSsse3 => Ssse3.IsSupported; - public static bool SupportsSse41 => Sse41.IsSupported; - public static bool SupportsSse42 => Sse42.IsSupported; - public static bool SupportsPclmulqdq => Pclmulqdq.IsSupported; - public static bool SupportsFma => Fma.IsSupported; - public static bool SupportsPopcnt => Popcnt.IsSupported; - public static bool SupportsAesni => Aes.IsSupported; - public static bool SupportsAvx => Avx.IsSupported; + static HardwareCapabilities() + { + if (!X86Base.IsSupported) + { + return; + } + + (_, _, int ecx, int edx) = X86Base.CpuId(0x00000001, 0x00000000); + + FeatureInfoEdx = (FeatureFlagsEdx)edx; + FeatureInfoEcx = (FeatureFlagsEcx)ecx; + } + + [Flags] + public enum FeatureFlagsEdx + { + Sse = 1 << 25, + Sse2 = 1 << 26 + } + + [Flags] + public enum FeatureFlagsEcx + { + Sse3 = 1 << 0, + Pclmulqdq = 1 << 1, + Ssse3 = 1 << 9, + Fma = 1 << 12, + Sse41 = 1 << 19, + Sse42 = 1 << 20, + Popcnt = 1 << 23, + Aes = 1 << 25, + Avx = 1 << 28, + F16c = 1 << 29 + } + + public static FeatureFlagsEdx FeatureInfoEdx { get; } + public static FeatureFlagsEcx FeatureInfoEcx { get; } + + public static bool SupportsSse => FeatureInfoEdx.HasFlag(FeatureFlagsEdx.Sse); + public static bool SupportsSse2 => FeatureInfoEdx.HasFlag(FeatureFlagsEdx.Sse2); + public static bool SupportsSse3 => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Sse3); + public static bool SupportsPclmulqdq => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Pclmulqdq); + public static bool SupportsSsse3 => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Ssse3); + public static bool SupportsFma => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Fma); + public static bool SupportsSse41 => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Sse41); + public static bool SupportsSse42 => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Sse42); + public static bool SupportsPopcnt => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Popcnt); + public static bool SupportsAesni => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Aes); + public static bool SupportsAvx => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.Avx); + public static bool SupportsF16c => FeatureInfoEcx.HasFlag(FeatureFlagsEcx.F16c); public static bool ForceLegacySse { get; set; } diff --git a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs index f7469bad..864b0a10 100644 --- a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs +++ b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs @@ -162,6 +162,8 @@ namespace ARMeilleure.CodeGen.X86 Add(Intrinsic.X86Unpckhps, new IntrinsicInfo(X86Instruction.Unpckhps, IntrinsicType.Binary)); Add(Intrinsic.X86Unpcklpd, new IntrinsicInfo(X86Instruction.Unpcklpd, IntrinsicType.Binary)); Add(Intrinsic.X86Unpcklps, new IntrinsicInfo(X86Instruction.Unpcklps, IntrinsicType.Binary)); + Add(Intrinsic.X86Vcvtph2ps, new IntrinsicInfo(X86Instruction.Vcvtph2ps, IntrinsicType.Unary)); + Add(Intrinsic.X86Vcvtps2ph, new IntrinsicInfo(X86Instruction.Vcvtps2ph, IntrinsicType.BinaryImm)); Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary)); Add(Intrinsic.X86Xorps, new IntrinsicInfo(X86Instruction.Xorps, IntrinsicType.Binary)); } diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs index f9b35d37..fae17b86 100644 --- a/ARMeilleure/CodeGen/X86/X86Instruction.cs +++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs @@ -33,7 +33,6 @@ namespace ARMeilleure.CodeGen.X86 Cmpxchg8, Comisd, Comiss, - Cpuid, Crc32, Crc32_16, Crc32_8, @@ -199,6 +198,8 @@ namespace ARMeilleure.CodeGen.X86 Unpcklps, Vblendvpd, Vblendvps, + Vcvtph2ps, + Vcvtps2ph, Vpblendvb, Xor, Xorpd, |
