diff options
| author | riperiperi <rhy3756547@hotmail.com> | 2020-07-13 11:48:14 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-07-13 20:48:14 +1000 |
| commit | d7044b10a253dae31b9a0041a432e3a7adce59f6 (patch) | |
| tree | 20947496224af14c43803de8169a6493aa77b45b /ARMeilleure/CodeGen | |
| parent | 30d4f752f47217bcdc4dd05428010acf270189d0 (diff) | |
Add SSE4.2 Path for CRC32, add A32 variant, add tests for non-castagnoli variants. (#1328)
* Add CRC32 A32 instructions.
* Fix CRC32 instructions.
* Add CRC intrinsic and fast path.
Loop is currently unrolled, will look into adding temp vars after tests are added.
* Begin work on Crc tests
* Fix SSE4.2 path for CRC32C, finialize tests.
* Remove unused IR path.
* Fix spacing between prefix checks.
* This should be Src.
* PTC Version
* OpCodeTable Order
* Integer check improvement. Value and Crc can be either 32 or 64 size.
* This wasn't necessary...
* If size is 3, value type must be I64.
* Fix same src+dest handling for non crc intrinsics.
* Pre-fix (ha) issue with vex encodings
Diffstat (limited to 'ARMeilleure/CodeGen')
| -rw-r--r-- | ARMeilleure/CodeGen/X86/Assembler.cs | 33 | ||||
| -rw-r--r-- | ARMeilleure/CodeGen/X86/CodeGenerator.cs | 15 | ||||
| -rw-r--r-- | ARMeilleure/CodeGen/X86/IntrinsicTable.cs | 3 | ||||
| -rw-r--r-- | ARMeilleure/CodeGen/X86/IntrinsicType.cs | 1 | ||||
| -rw-r--r-- | ARMeilleure/CodeGen/X86/PreAllocator.cs | 11 | ||||
| -rw-r--r-- | ARMeilleure/CodeGen/X86/X86Instruction.cs | 3 |
6 files changed, 59 insertions, 7 deletions
diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs index 537c746c..d0ccd6f8 100644 --- a/ARMeilleure/CodeGen/X86/Assembler.cs +++ b/ARMeilleure/CodeGen/X86/Assembler.cs @@ -28,10 +28,10 @@ namespace ARMeilleure.CodeGen.X86 Vex = 1 << 4, PrefixBit = 16, - PrefixMask = 3 << PrefixBit, + PrefixMask = 7 << PrefixBit, Prefix66 = 1 << PrefixBit, PrefixF3 = 2 << PrefixBit, - PrefixF2 = 3 << PrefixBit + PrefixF2 = 4 << PrefixBit } private struct InstructionInfo @@ -104,6 +104,9 @@ namespace ARMeilleure.CodeGen.X86 Add(X86Instruction.Comisd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Comiss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex)); Add(X86Instruction.Cpuid, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fa2, InstructionFlags.RegOnly)); + Add(X86Instruction.Crc32, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38f1, InstructionFlags.PrefixF2)); + Add(X86Instruction.Crc32_16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38f1, InstructionFlags.PrefixF2 | InstructionFlags.Prefix66)); + Add(X86Instruction.Crc32_8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38f0, InstructionFlags.PrefixF2 | InstructionFlags.Reg8Src)); Add(X86Instruction.Cvtdq2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF3)); Add(X86Instruction.Cvtdq2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex)); Add(X86Instruction.Cvtpd2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF2)); @@ -1172,7 +1175,15 @@ namespace ARMeilleure.CodeGen.X86 if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding) { - int vexByte2 = (int)(flags & InstructionFlags.PrefixMask) >> (int)InstructionFlags.PrefixBit; + // In a vex encoding, only one prefix can be active at a time. The active prefix is encoded in the second byte using two bits. + + int vexByte2 = (flags & InstructionFlags.PrefixMask) switch + { + InstructionFlags.Prefix66 => 1, + InstructionFlags.PrefixF3 => 2, + InstructionFlags.PrefixF2 => 3, + _ => 0 + }; if (src1 != null) { @@ -1220,11 +1231,19 @@ namespace ARMeilleure.CodeGen.X86 } else { - switch (flags & InstructionFlags.PrefixMask) + if (flags.HasFlag(InstructionFlags.Prefix66)) + { + WriteByte(0x66); + } + + if (flags.HasFlag(InstructionFlags.PrefixF2)) + { + WriteByte(0xf2); + } + + if (flags.HasFlag(InstructionFlags.PrefixF3)) { - case InstructionFlags.Prefix66: WriteByte(0x66); break; - case InstructionFlags.PrefixF2: WriteByte(0xf2); break; - case InstructionFlags.PrefixF3: WriteByte(0xf3); break; + WriteByte(0xf3); } if (rexPrefix != 0) diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs index e7e7553e..e217a665 100644 --- a/ARMeilleure/CodeGen/X86/CodeGenerator.cs +++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs @@ -333,6 +333,21 @@ namespace ARMeilleure.CodeGen.X86 break; } + case IntrinsicType.Crc32: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameReg(dest, src1); + + Debug.Assert(dest.Type.IsInteger() && src1.Type.IsInteger() && src2.Type.IsInteger()); + + context.Assembler.WriteInstruction(info.Inst, dest, src2, dest.Type); + + break; + } + case IntrinsicType.BinaryImm: { Operand dest = operation.Destination; diff --git a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs index bc07c6b0..f7469bad 100644 --- a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs +++ b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs @@ -38,6 +38,9 @@ namespace ARMeilleure.CodeGen.X86 Add(Intrinsic.X86Comisseq, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_)); Add(Intrinsic.X86Comissge, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_)); Add(Intrinsic.X86Comisslt, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_)); + Add(Intrinsic.X86Crc32, new IntrinsicInfo(X86Instruction.Crc32, IntrinsicType.Crc32)); + Add(Intrinsic.X86Crc32_16, new IntrinsicInfo(X86Instruction.Crc32_16, IntrinsicType.Crc32)); + Add(Intrinsic.X86Crc32_8, new IntrinsicInfo(X86Instruction.Crc32_8, IntrinsicType.Crc32)); Add(Intrinsic.X86Cvtdq2pd, new IntrinsicInfo(X86Instruction.Cvtdq2pd, IntrinsicType.Unary)); Add(Intrinsic.X86Cvtdq2ps, new IntrinsicInfo(X86Instruction.Cvtdq2ps, IntrinsicType.Unary)); Add(Intrinsic.X86Cvtpd2dq, new IntrinsicInfo(X86Instruction.Cvtpd2dq, IntrinsicType.Unary)); diff --git a/ARMeilleure/CodeGen/X86/IntrinsicType.cs b/ARMeilleure/CodeGen/X86/IntrinsicType.cs index 41c52b59..fe0f66ed 100644 --- a/ARMeilleure/CodeGen/X86/IntrinsicType.cs +++ b/ARMeilleure/CodeGen/X86/IntrinsicType.cs @@ -9,6 +9,7 @@ namespace ARMeilleure.CodeGen.X86 Binary, BinaryGpr, BinaryImm, + Crc32, Ternary, TernaryImm } diff --git a/ARMeilleure/CodeGen/X86/PreAllocator.cs b/ARMeilleure/CodeGen/X86/PreAllocator.cs index d1794b55..dc7f3a75 100644 --- a/ARMeilleure/CodeGen/X86/PreAllocator.cs +++ b/ARMeilleure/CodeGen/X86/PreAllocator.cs @@ -1294,11 +1294,22 @@ namespace ARMeilleure.CodeGen.X86 case Instruction.VectorInsert16: case Instruction.VectorInsert8: return !HardwareCapabilities.SupportsVexEncoding; + + case Instruction.Extended: + return IsIntrinsicSameOperandDestSrc1(operation); } return IsVexSameOperandDestSrc1(operation); } + private static bool IsIntrinsicSameOperandDestSrc1(Operation operation) + { + IntrinsicOperation intrinOp = (IntrinsicOperation)operation; + IntrinsicInfo info = IntrinsicTable.GetInfo(intrinOp.Intrinsic); + + return info.Type == IntrinsicType.Crc32 || IsVexSameOperandDestSrc1(operation); + } + private static bool IsVexSameOperandDestSrc1(Operation operation) { if (IsIntrinsic(operation.Instruction)) diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs index c3dffc62..9ac17e5b 100644 --- a/ARMeilleure/CodeGen/X86/X86Instruction.cs +++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs @@ -33,6 +33,9 @@ namespace ARMeilleure.CodeGen.X86 Comisd, Comiss, Cpuid, + Crc32, + Crc32_16, + Crc32_8, Cvtdq2pd, Cvtdq2ps, Cvtpd2dq, |
