aboutsummaryrefslogtreecommitdiff
path: root/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
diff options
context:
space:
mode:
authorWunk <wunkolo@gmail.com>2023-03-20 12:09:24 -0700
committerGitHub <noreply@github.com>2023-03-20 16:09:24 -0300
commit17620d18db8d4a67e4b917596c760107d26fadc5 (patch)
tree4a17ec5f209e64e4944b7deceec5dbdf1e0d9dc3 /ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
parent9f1cf6458c78a42256b1f390f5b3b9159b00a7cb (diff)
ARMeilleure: Add initial support for AVX512 (EVEX encoding) (cont) (#4147)
* ARMeilleure: Add AVX512{F,VL,DQ,BW} detection Add `UseAvx512Ortho` and `UseAvx512OrthoFloat` optimization flags as short-hands for `F+VL` and `F+VL+DQ`. * ARMeilleure: Add initial support for EVEX instruction encoding Does not implement rounding, or exception controls. * ARMeilleure: Add `X86Vpternlogd` Accelerates the vector-`Not` instruction. * ARMeilleure: Add check for `OSXSAVE` for AVX{2,512} * ARMeilleure: Add check for `XCR0` flags Add XCR0 register checks for AVX and AVX512F, following the guidelines from section 14.3 and 15.2 from the Intel Architecture Software Developer's Manual. * ARMeilleure: Remove redundant `ReProtect` and `Dispose`, formatting * ARMeilleure: Move XCR0 procedure to GetXcr0Eax * ARMeilleure: Add `XCR0` to `FeatureInfo` structure * ARMeilleure: Utilize `ReadOnlySpan` for Xcr0 assembly Avoids an additional allocation * ARMeilleure: Formatting fixes * ARMeilleure: Fix EVEX encoding src2 register index > Just like in VEX prefix, vvvv is provided in inverted form. * ARMeilleure: Add `X86Vpternlogd` acceleration to `Vmvn_I` Passes unit tests, verified instruction utilization * ARMeilleure: Fix EVEX register operand designations Operand 2 was being sourced improperly. EVEX encoded instructions source their operands like so: Operand 1: ModRM:reg Operand 2: EVEX.vvvvv Operand 3: ModRM:r/m Operand 4: Imm This fixes the improper register designations when emitting vpternlog. Now "dest", "src1", "src2" arguments emit in the proper order in EVEX instructions. * ARMeilleure: Add `X86Vpternlogd` acceleration to `Orn_V` * ARMeilleure: PTC version bump * ARMeilleure: Update EVEX encoding Debug.Assert to Debug.Fail * ARMeilleure: Update EVEX encoding comment capitalization
Diffstat (limited to 'ARMeilleure/CodeGen/X86/HardwareCapabilities.cs')
-rw-r--r--ARMeilleure/CodeGen/X86/HardwareCapabilities.cs52
1 files changed, 50 insertions, 2 deletions
diff --git a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
index c12a4e28..63a9e46a 100644
--- a/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
+++ b/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
@@ -1,10 +1,14 @@
+using Ryujinx.Memory;
using System;
+using System.Runtime.InteropServices;
using System.Runtime.Intrinsics.X86;
namespace ARMeilleure.CodeGen.X86
{
static class HardwareCapabilities
{
+ private delegate uint GetXcr0();
+
static HardwareCapabilities()
{
if (!X86Base.IsSupported)
@@ -24,6 +28,28 @@ namespace ARMeilleure.CodeGen.X86
FeatureInfo7Ebx = (FeatureFlags7Ebx)ebx7;
FeatureInfo7Ecx = (FeatureFlags7Ecx)ecx7;
}
+
+ Xcr0InfoEax = (Xcr0FlagsEax)GetXcr0Eax();
+ }
+
+ private static uint GetXcr0Eax()
+ {
+ ReadOnlySpan<byte> asmGetXcr0 = new byte[]
+ {
+ 0x31, 0xc9, // xor ecx, ecx
+ 0xf, 0x01, 0xd0, // xgetbv
+ 0xc3, // ret
+ };
+
+ using MemoryBlock memGetXcr0 = new MemoryBlock((ulong)asmGetXcr0.Length);
+
+ memGetXcr0.Write(0, asmGetXcr0);
+
+ memGetXcr0.Reprotect(0, (ulong)asmGetXcr0.Length, MemoryPermission.ReadAndExecute);
+
+ var fGetXcr0 = Marshal.GetDelegateForFunctionPointer<GetXcr0>(memGetXcr0.Pointer);
+
+ return fGetXcr0();
}
[Flags]
@@ -44,6 +70,7 @@ namespace ARMeilleure.CodeGen.X86
Sse42 = 1 << 20,
Popcnt = 1 << 23,
Aes = 1 << 25,
+ Osxsave = 1 << 27,
Avx = 1 << 28,
F16c = 1 << 29
}
@@ -52,7 +79,11 @@ namespace ARMeilleure.CodeGen.X86
public enum FeatureFlags7Ebx
{
Avx2 = 1 << 5,
- Sha = 1 << 29
+ Avx512f = 1 << 16,
+ Avx512dq = 1 << 17,
+ Sha = 1 << 29,
+ Avx512bw = 1 << 30,
+ Avx512vl = 1 << 31
}
[Flags]
@@ -61,10 +92,21 @@ namespace ARMeilleure.CodeGen.X86
Gfni = 1 << 8,
}
+ [Flags]
+ public enum Xcr0FlagsEax
+ {
+ Sse = 1 << 1,
+ YmmHi128 = 1 << 2,
+ Opmask = 1 << 5,
+ ZmmHi256 = 1 << 6,
+ Hi16Zmm = 1 << 7
+ }
+
public static FeatureFlags1Edx FeatureInfo1Edx { get; }
public static FeatureFlags1Ecx FeatureInfo1Ecx { get; }
public static FeatureFlags7Ebx FeatureInfo7Ebx { get; } = 0;
public static FeatureFlags7Ecx FeatureInfo7Ecx { get; } = 0;
+ public static Xcr0FlagsEax Xcr0InfoEax { get; } = 0;
public static bool SupportsSse => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse);
public static bool SupportsSse2 => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse2);
@@ -76,8 +118,13 @@ namespace ARMeilleure.CodeGen.X86
public static bool SupportsSse42 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse42);
public static bool SupportsPopcnt => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Popcnt);
public static bool SupportsAesni => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Aes);
- public static bool SupportsAvx => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Avx);
+ public static bool SupportsAvx => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Avx | FeatureFlags1Ecx.Osxsave) && Xcr0InfoEax.HasFlag(Xcr0FlagsEax.Sse | Xcr0FlagsEax.YmmHi128);
public static bool SupportsAvx2 => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx2) && SupportsAvx;
+ public static bool SupportsAvx512F => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512f) && FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Osxsave)
+ && Xcr0InfoEax.HasFlag(Xcr0FlagsEax.Sse | Xcr0FlagsEax.YmmHi128 | Xcr0FlagsEax.Opmask | Xcr0FlagsEax.ZmmHi256 | Xcr0FlagsEax.Hi16Zmm);
+ public static bool SupportsAvx512Vl => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512vl) && SupportsAvx512F;
+ public static bool SupportsAvx512Bw => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512bw) && SupportsAvx512F;
+ public static bool SupportsAvx512Dq => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512dq) && SupportsAvx512F;
public static bool SupportsF16c => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.F16c);
public static bool SupportsSha => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Sha);
public static bool SupportsGfni => FeatureInfo7Ecx.HasFlag(FeatureFlags7Ecx.Gfni);
@@ -85,5 +132,6 @@ namespace ARMeilleure.CodeGen.X86
public static bool ForceLegacySse { get; set; }
public static bool SupportsVexEncoding => SupportsAvx && !ForceLegacySse;
+ public static bool SupportsEvexEncoding => SupportsAvx512F && !ForceLegacySse;
}
} \ No newline at end of file