diff options
| author | Wunk <wunkolo@gmail.com> | 2022-12-18 11:46:13 -0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-12-18 16:46:13 -0300 |
| commit | 295fbd0542a93ac50e558054a3f0c8c64286b764 (patch) | |
| tree | 0c5f631e2a53f10e42083f1623f5ff26926ef27d /ARMeilleure/CodeGen/X86/Assembler.cs | |
| parent | d7310d7a1c3788ff931a5f46677386a07ceeaf62 (diff) | |
ARMeilleure: Add initial support for AVX512(EVEX encoding) (#3663)
* ARMeilleure: Add AVX512{F,VL,DQ,BW} detection
Add `UseAvx512Ortho` and `UseAvx512OrthoFloat` optimization flags as
short-hands for `F+VL` and `F+VL+DQ`.
* ARMeilleure: Add initial support for EVEX instruction encoding
Does not implement rounding, or exception controls.
* ARMeilleure: Add `X86Vpternlogd`
Accelerates the vector-`Not` instruction.
* ARMeilleure: Add check for `OSXSAVE` for AVX{2,512}
* ARMeilleure: Add check for `XCR0` flags
Add XCR0 register checks for AVX and AVX512F, following the guidelines
from section 14.3 and 15.2 from the Intel Architecture Software
Developer's Manual.
* ARMeilleure: Increment InternalVersion
* ARMeilleure: Remove redundant `ReProtect` and `Dispose`, formatting
* ARMeilleure: Move XCR0 procedure to GetXcr0Eax
* ARMeilleure: Add `XCR0` to `FeatureInfo` structure
* ARMeilleure: Utilize `ReadOnlySpan` for Xcr0 assembly
Avoids an additional allocation
* ARMeilleure: Formatting fixes
Diffstat (limited to 'ARMeilleure/CodeGen/X86/Assembler.cs')
| -rw-r--r-- | ARMeilleure/CodeGen/X86/Assembler.cs | 105 |
1 files changed, 104 insertions, 1 deletions
diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs index c15deadc..123c80fb 100644 --- a/ARMeilleure/CodeGen/X86/Assembler.cs +++ b/ARMeilleure/CodeGen/X86/Assembler.cs @@ -1033,7 +1033,13 @@ namespace ARMeilleure.CodeGen.X86 Debug.Assert(opCode != BadOp, "Invalid opcode value."); - if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding) + if ((flags & InstructionFlags.Evex) != 0 && HardwareCapabilities.SupportsEvexEncoding) + { + WriteEvexInst(dest, src1, src2, type, flags, opCode); + + opCode &= 0xff; + } + else if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding) { // In a vex encoding, only one prefix can be active at a time. The active prefix is encoded in the second byte using two bits. @@ -1152,6 +1158,103 @@ namespace ARMeilleure.CodeGen.X86 } } + private void WriteEvexInst( + Operand dest, + Operand src1, + Operand src2, + OperandType type, + InstructionFlags flags, + int opCode, + bool broadcast = false, + int registerWidth = 128, + int maskRegisterIdx = 0, + bool zeroElements = false) + { + int destIdx = dest.GetRegister().Index; + int src1Idx = src1.GetRegister().Index; + int src2Idx = src2.GetRegister().Index; + + WriteByte(0x62); + + // P0 + // Extend dest register + bool r = (destIdx & 8) == 0; + // Extend src register + bool x = (src1Idx & 16) == 0; + // Extend src register + bool b = (src1Idx & 8) == 0; + // Extend dest register + bool rp = (destIdx & 16) == 0; + // Escape code index + byte mm = 0b00; + + switch ((ushort)(opCode >> 8)) + { + case 0xf00: mm = 0b01; break; + case 0xf38: mm = 0b10; break; + case 0xf3a: mm = 0b11; break; + + default: Debug.Assert(false, $"Failed to EVEX encode opcode 0x{opCode:X}."); break; + } + + WriteByte( + (byte)( + (r ? 0x80 : 0) | + (x ? 0x40 : 0) | + (b ? 0x20 : 0) | + (rp ? 0x10 : 0) | + mm)); + + // P1 + // Specify 64-bit lane mode + bool w = Is64Bits(type); + // Src2 register index + byte vvvv = (byte)(src2Idx & 0b1111); + // Opcode prefix + byte pp = (flags & InstructionFlags.PrefixMask) switch + { + InstructionFlags.Prefix66 => 0b01, + InstructionFlags.PrefixF3 => 0b10, + InstructionFlags.PrefixF2 => 0b11, + _ => 0 + }; + WriteByte( + (byte)( + (w ? 0x80 : 0) | + (vvvv << 3) | + 0b100 | + pp)); + + // P2 + // Mask register determines what elements to zero, rather than what elements to merge + bool z = zeroElements; + // Specifies register-width + byte ll = 0b00; + switch (registerWidth) + { + case 128: ll = 0b00; break; + case 256: ll = 0b01; break; + case 512: ll = 0b10; break; + + default: Debug.Assert(false, $"Invalid EVEX vector register width {registerWidth}."); break; + } + // Embedded broadcast in the case of a memory operand + bool bcast = broadcast; + // Extend src2 register + bool vp = (src2Idx & 16) == 0; + // Mask register index + Debug.Assert(maskRegisterIdx < 8, $"Invalid mask register index {maskRegisterIdx}."); + byte aaa = (byte)(maskRegisterIdx & 0b111); + + WriteByte( + (byte)( + (z ? 0x80 : 0) | + (ll << 5) | + (bcast ? 0x10 : 0) | + (vp ? 8 : 0) | + aaa)); + } + private void WriteCompactInst(Operand operand, int opCode) { int regIndex = operand.GetRegister().Index; |
