diff options
| author | Wunk <wunkolo@gmail.com> | 2023-03-20 12:09:24 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-03-20 16:09:24 -0300 |
| commit | 17620d18db8d4a67e4b917596c760107d26fadc5 (patch) | |
| tree | 4a17ec5f209e64e4944b7deceec5dbdf1e0d9dc3 /ARMeilleure/CodeGen/X86/HardwareCapabilities.cs | |
| parent | 9f1cf6458c78a42256b1f390f5b3b9159b00a7cb (diff) | |
ARMeilleure: Add initial support for AVX512 (EVEX encoding) (cont) (#4147)
* ARMeilleure: Add AVX512{F,VL,DQ,BW} detection
Add `UseAvx512Ortho` and `UseAvx512OrthoFloat` optimization flags as
short-hands for `F+VL` and `F+VL+DQ`.
* ARMeilleure: Add initial support for EVEX instruction encoding
Does not implement rounding, or exception controls.
* ARMeilleure: Add `X86Vpternlogd`
Accelerates the vector-`Not` instruction.
* ARMeilleure: Add check for `OSXSAVE` for AVX{2,512}
* ARMeilleure: Add check for `XCR0` flags
Add XCR0 register checks for AVX and AVX512F, following the guidelines
from section 14.3 and 15.2 from the Intel Architecture Software
Developer's Manual.
* ARMeilleure: Remove redundant `ReProtect` and `Dispose`, formatting
* ARMeilleure: Move XCR0 procedure to GetXcr0Eax
* ARMeilleure: Add `XCR0` to `FeatureInfo` structure
* ARMeilleure: Utilize `ReadOnlySpan` for Xcr0 assembly
Avoids an additional allocation
* ARMeilleure: Formatting fixes
* ARMeilleure: Fix EVEX encoding src2 register index
> Just like in VEX prefix, vvvv is provided in inverted form.
* ARMeilleure: Add `X86Vpternlogd` acceleration to `Vmvn_I`
Passes unit tests, verified instruction utilization
* ARMeilleure: Fix EVEX register operand designations
Operand 2 was being sourced improperly.
EVEX encoded instructions source their operands like so:
Operand 1: ModRM:reg
Operand 2: EVEX.vvvvv
Operand 3: ModRM:r/m
Operand 4: Imm
This fixes the improper register designations when emitting vpternlog.
Now "dest", "src1", "src2" arguments emit in the proper order in EVEX instructions.
* ARMeilleure: Add `X86Vpternlogd` acceleration to `Orn_V`
* ARMeilleure: PTC version bump
* ARMeilleure: Update EVEX encoding Debug.Assert to Debug.Fail
* ARMeilleure: Update EVEX encoding comment capitalization
Diffstat (limited to 'ARMeilleure/CodeGen/X86/HardwareCapabilities.cs')
| -rw-r--r-- | ARMeilleure/CodeGen/X86/HardwareCapabilities.cs | 52 |
1 files changed, 50 insertions, 2 deletions
diff --git a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs index c12a4e28..63a9e46a 100644 --- a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs +++ b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs @@ -1,10 +1,14 @@ +using Ryujinx.Memory; using System; +using System.Runtime.InteropServices; using System.Runtime.Intrinsics.X86; namespace ARMeilleure.CodeGen.X86 { static class HardwareCapabilities { + private delegate uint GetXcr0(); + static HardwareCapabilities() { if (!X86Base.IsSupported) @@ -24,6 +28,28 @@ namespace ARMeilleure.CodeGen.X86 FeatureInfo7Ebx = (FeatureFlags7Ebx)ebx7; FeatureInfo7Ecx = (FeatureFlags7Ecx)ecx7; } + + Xcr0InfoEax = (Xcr0FlagsEax)GetXcr0Eax(); + } + + private static uint GetXcr0Eax() + { + ReadOnlySpan<byte> asmGetXcr0 = new byte[] + { + 0x31, 0xc9, // xor ecx, ecx + 0xf, 0x01, 0xd0, // xgetbv + 0xc3, // ret + }; + + using MemoryBlock memGetXcr0 = new MemoryBlock((ulong)asmGetXcr0.Length); + + memGetXcr0.Write(0, asmGetXcr0); + + memGetXcr0.Reprotect(0, (ulong)asmGetXcr0.Length, MemoryPermission.ReadAndExecute); + + var fGetXcr0 = Marshal.GetDelegateForFunctionPointer<GetXcr0>(memGetXcr0.Pointer); + + return fGetXcr0(); } [Flags] @@ -44,6 +70,7 @@ namespace ARMeilleure.CodeGen.X86 Sse42 = 1 << 20, Popcnt = 1 << 23, Aes = 1 << 25, + Osxsave = 1 << 27, Avx = 1 << 28, F16c = 1 << 29 } @@ -52,7 +79,11 @@ namespace ARMeilleure.CodeGen.X86 public enum FeatureFlags7Ebx { Avx2 = 1 << 5, - Sha = 1 << 29 + Avx512f = 1 << 16, + Avx512dq = 1 << 17, + Sha = 1 << 29, + Avx512bw = 1 << 30, + Avx512vl = 1 << 31 } [Flags] @@ -61,10 +92,21 @@ namespace ARMeilleure.CodeGen.X86 Gfni = 1 << 8, } + [Flags] + public enum Xcr0FlagsEax + { + Sse = 1 << 1, + YmmHi128 = 1 << 2, + Opmask = 1 << 5, + ZmmHi256 = 1 << 6, + Hi16Zmm = 1 << 7 + } + public static FeatureFlags1Edx FeatureInfo1Edx { get; } public static FeatureFlags1Ecx FeatureInfo1Ecx { get; } public static FeatureFlags7Ebx FeatureInfo7Ebx { get; } = 0; public static FeatureFlags7Ecx FeatureInfo7Ecx { get; } = 0; + public static Xcr0FlagsEax Xcr0InfoEax { get; } = 0; public static bool SupportsSse => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse); public static bool SupportsSse2 => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse2); @@ -76,8 +118,13 @@ namespace ARMeilleure.CodeGen.X86 public static bool SupportsSse42 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse42); public static bool SupportsPopcnt => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Popcnt); public static bool SupportsAesni => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Aes); - public static bool SupportsAvx => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Avx); + public static bool SupportsAvx => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Avx | FeatureFlags1Ecx.Osxsave) && Xcr0InfoEax.HasFlag(Xcr0FlagsEax.Sse | Xcr0FlagsEax.YmmHi128); public static bool SupportsAvx2 => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx2) && SupportsAvx; + public static bool SupportsAvx512F => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512f) && FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Osxsave) + && Xcr0InfoEax.HasFlag(Xcr0FlagsEax.Sse | Xcr0FlagsEax.YmmHi128 | Xcr0FlagsEax.Opmask | Xcr0FlagsEax.ZmmHi256 | Xcr0FlagsEax.Hi16Zmm); + public static bool SupportsAvx512Vl => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512vl) && SupportsAvx512F; + public static bool SupportsAvx512Bw => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512bw) && SupportsAvx512F; + public static bool SupportsAvx512Dq => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512dq) && SupportsAvx512F; public static bool SupportsF16c => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.F16c); public static bool SupportsSha => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Sha); public static bool SupportsGfni => FeatureInfo7Ecx.HasFlag(FeatureFlags7Ecx.Gfni); @@ -85,5 +132,6 @@ namespace ARMeilleure.CodeGen.X86 public static bool ForceLegacySse { get; set; } public static bool SupportsVexEncoding => SupportsAvx && !ForceLegacySse; + public static bool SupportsEvexEncoding => SupportsAvx512F && !ForceLegacySse; } }
\ No newline at end of file |
