From 17620d18db8d4a67e4b917596c760107d26fadc5 Mon Sep 17 00:00:00 2001 From: Wunk Date: Mon, 20 Mar 2023 12:09:24 -0700 Subject: ARMeilleure: Add initial support for AVX512 (EVEX encoding) (cont) (#4147) * ARMeilleure: Add AVX512{F,VL,DQ,BW} detection Add `UseAvx512Ortho` and `UseAvx512OrthoFloat` optimization flags as short-hands for `F+VL` and `F+VL+DQ`. * ARMeilleure: Add initial support for EVEX instruction encoding Does not implement rounding, or exception controls. * ARMeilleure: Add `X86Vpternlogd` Accelerates the vector-`Not` instruction. * ARMeilleure: Add check for `OSXSAVE` for AVX{2,512} * ARMeilleure: Add check for `XCR0` flags Add XCR0 register checks for AVX and AVX512F, following the guidelines from section 14.3 and 15.2 from the Intel Architecture Software Developer's Manual. * ARMeilleure: Remove redundant `ReProtect` and `Dispose`, formatting * ARMeilleure: Move XCR0 procedure to GetXcr0Eax * ARMeilleure: Add `XCR0` to `FeatureInfo` structure * ARMeilleure: Utilize `ReadOnlySpan` for Xcr0 assembly Avoids an additional allocation * ARMeilleure: Formatting fixes * ARMeilleure: Fix EVEX encoding src2 register index > Just like in VEX prefix, vvvv is provided in inverted form. * ARMeilleure: Add `X86Vpternlogd` acceleration to `Vmvn_I` Passes unit tests, verified instruction utilization * ARMeilleure: Fix EVEX register operand designations Operand 2 was being sourced improperly. EVEX encoded instructions source their operands like so: Operand 1: ModRM:reg Operand 2: EVEX.vvvvv Operand 3: ModRM:r/m Operand 4: Imm This fixes the improper register designations when emitting vpternlog. Now "dest", "src1", "src2" arguments emit in the proper order in EVEX instructions. * ARMeilleure: Add `X86Vpternlogd` acceleration to `Orn_V` * ARMeilleure: PTC version bump * ARMeilleure: Update EVEX encoding Debug.Assert to Debug.Fail * ARMeilleure: Update EVEX encoding comment capitalization --- ARMeilleure/Translation/PTC/Ptc.cs | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'ARMeilleure/Translation') diff --git a/ARMeilleure/Translation/PTC/Ptc.cs b/ARMeilleure/Translation/PTC/Ptc.cs index 0b23fd04..17f68706 100644 --- a/ARMeilleure/Translation/PTC/Ptc.cs +++ b/ARMeilleure/Translation/PTC/Ptc.cs @@ -30,7 +30,7 @@ namespace ARMeilleure.Translation.PTC private const string OuterHeaderMagicString = "PTCohd\0\0"; private const string InnerHeaderMagicString = "PTCihd\0\0"; - private const uint InternalVersion = 4484; //! To be incremented manually for each change to the ARMeilleure project. + private const uint InternalVersion = 4485; //! To be incremented manually for each change to the ARMeilleure project. private const string ActualDir = "0"; private const string BackupDir = "1"; @@ -969,6 +969,7 @@ namespace ARMeilleure.Translation.PTC (ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap, (ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap2, (ulong)Arm64HardwareCapabilities.MacOsFeatureInfo, + 0, 0); } else if (RuntimeInformation.ProcessArchitecture == Architecture.X64) @@ -977,11 +978,12 @@ namespace ARMeilleure.Translation.PTC (ulong)X86HardwareCapabilities.FeatureInfo1Ecx, (ulong)X86HardwareCapabilities.FeatureInfo1Edx, (ulong)X86HardwareCapabilities.FeatureInfo7Ebx, - (ulong)X86HardwareCapabilities.FeatureInfo7Ecx); + (ulong)X86HardwareCapabilities.FeatureInfo7Ecx, + (ulong)X86HardwareCapabilities.Xcr0InfoEax); } else { - return new FeatureInfo(0, 0, 0, 0); + return new FeatureInfo(0, 0, 0, 0, 0); } } @@ -1002,7 +1004,7 @@ namespace ARMeilleure.Translation.PTC return osPlatform; } - [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 78*/)] + [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 86*/)] private struct OuterHeader { public ulong Magic; @@ -1034,8 +1036,8 @@ namespace ARMeilleure.Translation.PTC } } - [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 32*/)] - private record struct FeatureInfo(ulong FeatureInfo0, ulong FeatureInfo1, ulong FeatureInfo2, ulong FeatureInfo3); + [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 40*/)] + private record struct FeatureInfo(ulong FeatureInfo0, ulong FeatureInfo1, ulong FeatureInfo2, ulong FeatureInfo3, ulong FeatureInfo4); [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 128*/)] private struct InnerHeader -- cgit v1.2.3