aboutsummaryrefslogtreecommitdiff
path: root/ARMeilleure/CodeGen
diff options
context:
space:
mode:
authorriperiperi <rhy3756547@hotmail.com>2020-07-13 11:48:14 +0100
committerGitHub <noreply@github.com>2020-07-13 20:48:14 +1000
commitd7044b10a253dae31b9a0041a432e3a7adce59f6 (patch)
tree20947496224af14c43803de8169a6493aa77b45b /ARMeilleure/CodeGen
parent30d4f752f47217bcdc4dd05428010acf270189d0 (diff)
Add SSE4.2 Path for CRC32, add A32 variant, add tests for non-castagnoli variants. (#1328)
* Add CRC32 A32 instructions. * Fix CRC32 instructions. * Add CRC intrinsic and fast path. Loop is currently unrolled, will look into adding temp vars after tests are added. * Begin work on Crc tests * Fix SSE4.2 path for CRC32C, finialize tests. * Remove unused IR path. * Fix spacing between prefix checks. * This should be Src. * PTC Version * OpCodeTable Order * Integer check improvement. Value and Crc can be either 32 or 64 size. * This wasn't necessary... * If size is 3, value type must be I64. * Fix same src+dest handling for non crc intrinsics. * Pre-fix (ha) issue with vex encodings
Diffstat (limited to 'ARMeilleure/CodeGen')
-rw-r--r--ARMeilleure/CodeGen/X86/Assembler.cs33
-rw-r--r--ARMeilleure/CodeGen/X86/CodeGenerator.cs15
-rw-r--r--ARMeilleure/CodeGen/X86/IntrinsicTable.cs3
-rw-r--r--ARMeilleure/CodeGen/X86/IntrinsicType.cs1
-rw-r--r--ARMeilleure/CodeGen/X86/PreAllocator.cs11
-rw-r--r--ARMeilleure/CodeGen/X86/X86Instruction.cs3
6 files changed, 59 insertions, 7 deletions
diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs
index 537c746c..d0ccd6f8 100644
--- a/ARMeilleure/CodeGen/X86/Assembler.cs
+++ b/ARMeilleure/CodeGen/X86/Assembler.cs
@@ -28,10 +28,10 @@ namespace ARMeilleure.CodeGen.X86
Vex = 1 << 4,
PrefixBit = 16,
- PrefixMask = 3 << PrefixBit,
+ PrefixMask = 7 << PrefixBit,
Prefix66 = 1 << PrefixBit,
PrefixF3 = 2 << PrefixBit,
- PrefixF2 = 3 << PrefixBit
+ PrefixF2 = 4 << PrefixBit
}
private struct InstructionInfo
@@ -104,6 +104,9 @@ namespace ARMeilleure.CodeGen.X86
Add(X86Instruction.Comisd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex | InstructionFlags.Prefix66));
Add(X86Instruction.Comiss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex));
Add(X86Instruction.Cpuid, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fa2, InstructionFlags.RegOnly));
+ Add(X86Instruction.Crc32, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38f1, InstructionFlags.PrefixF2));
+ Add(X86Instruction.Crc32_16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38f1, InstructionFlags.PrefixF2 | InstructionFlags.Prefix66));
+ Add(X86Instruction.Crc32_8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38f0, InstructionFlags.PrefixF2 | InstructionFlags.Reg8Src));
Add(X86Instruction.Cvtdq2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF3));
Add(X86Instruction.Cvtdq2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex));
Add(X86Instruction.Cvtpd2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF2));
@@ -1172,7 +1175,15 @@ namespace ARMeilleure.CodeGen.X86
if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding)
{
- int vexByte2 = (int)(flags & InstructionFlags.PrefixMask) >> (int)InstructionFlags.PrefixBit;
+ // In a vex encoding, only one prefix can be active at a time. The active prefix is encoded in the second byte using two bits.
+
+ int vexByte2 = (flags & InstructionFlags.PrefixMask) switch
+ {
+ InstructionFlags.Prefix66 => 1,
+ InstructionFlags.PrefixF3 => 2,
+ InstructionFlags.PrefixF2 => 3,
+ _ => 0
+ };
if (src1 != null)
{
@@ -1220,11 +1231,19 @@ namespace ARMeilleure.CodeGen.X86
}
else
{
- switch (flags & InstructionFlags.PrefixMask)
+ if (flags.HasFlag(InstructionFlags.Prefix66))
+ {
+ WriteByte(0x66);
+ }
+
+ if (flags.HasFlag(InstructionFlags.PrefixF2))
+ {
+ WriteByte(0xf2);
+ }
+
+ if (flags.HasFlag(InstructionFlags.PrefixF3))
{
- case InstructionFlags.Prefix66: WriteByte(0x66); break;
- case InstructionFlags.PrefixF2: WriteByte(0xf2); break;
- case InstructionFlags.PrefixF3: WriteByte(0xf3); break;
+ WriteByte(0xf3);
}
if (rexPrefix != 0)
diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs
index e7e7553e..e217a665 100644
--- a/ARMeilleure/CodeGen/X86/CodeGenerator.cs
+++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs
@@ -333,6 +333,21 @@ namespace ARMeilleure.CodeGen.X86
break;
}
+ case IntrinsicType.Crc32:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameReg(dest, src1);
+
+ Debug.Assert(dest.Type.IsInteger() && src1.Type.IsInteger() && src2.Type.IsInteger());
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src2, dest.Type);
+
+ break;
+ }
+
case IntrinsicType.BinaryImm:
{
Operand dest = operation.Destination;
diff --git a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
index bc07c6b0..f7469bad 100644
--- a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
+++ b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
@@ -38,6 +38,9 @@ namespace ARMeilleure.CodeGen.X86
Add(Intrinsic.X86Comisseq, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_));
Add(Intrinsic.X86Comissge, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_));
Add(Intrinsic.X86Comisslt, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Crc32, new IntrinsicInfo(X86Instruction.Crc32, IntrinsicType.Crc32));
+ Add(Intrinsic.X86Crc32_16, new IntrinsicInfo(X86Instruction.Crc32_16, IntrinsicType.Crc32));
+ Add(Intrinsic.X86Crc32_8, new IntrinsicInfo(X86Instruction.Crc32_8, IntrinsicType.Crc32));
Add(Intrinsic.X86Cvtdq2pd, new IntrinsicInfo(X86Instruction.Cvtdq2pd, IntrinsicType.Unary));
Add(Intrinsic.X86Cvtdq2ps, new IntrinsicInfo(X86Instruction.Cvtdq2ps, IntrinsicType.Unary));
Add(Intrinsic.X86Cvtpd2dq, new IntrinsicInfo(X86Instruction.Cvtpd2dq, IntrinsicType.Unary));
diff --git a/ARMeilleure/CodeGen/X86/IntrinsicType.cs b/ARMeilleure/CodeGen/X86/IntrinsicType.cs
index 41c52b59..fe0f66ed 100644
--- a/ARMeilleure/CodeGen/X86/IntrinsicType.cs
+++ b/ARMeilleure/CodeGen/X86/IntrinsicType.cs
@@ -9,6 +9,7 @@ namespace ARMeilleure.CodeGen.X86
Binary,
BinaryGpr,
BinaryImm,
+ Crc32,
Ternary,
TernaryImm
}
diff --git a/ARMeilleure/CodeGen/X86/PreAllocator.cs b/ARMeilleure/CodeGen/X86/PreAllocator.cs
index d1794b55..dc7f3a75 100644
--- a/ARMeilleure/CodeGen/X86/PreAllocator.cs
+++ b/ARMeilleure/CodeGen/X86/PreAllocator.cs
@@ -1294,11 +1294,22 @@ namespace ARMeilleure.CodeGen.X86
case Instruction.VectorInsert16:
case Instruction.VectorInsert8:
return !HardwareCapabilities.SupportsVexEncoding;
+
+ case Instruction.Extended:
+ return IsIntrinsicSameOperandDestSrc1(operation);
}
return IsVexSameOperandDestSrc1(operation);
}
+ private static bool IsIntrinsicSameOperandDestSrc1(Operation operation)
+ {
+ IntrinsicOperation intrinOp = (IntrinsicOperation)operation;
+ IntrinsicInfo info = IntrinsicTable.GetInfo(intrinOp.Intrinsic);
+
+ return info.Type == IntrinsicType.Crc32 || IsVexSameOperandDestSrc1(operation);
+ }
+
private static bool IsVexSameOperandDestSrc1(Operation operation)
{
if (IsIntrinsic(operation.Instruction))
diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs
index c3dffc62..9ac17e5b 100644
--- a/ARMeilleure/CodeGen/X86/X86Instruction.cs
+++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs
@@ -33,6 +33,9 @@ namespace ARMeilleure.CodeGen.X86
Comisd,
Comiss,
Cpuid,
+ Crc32,
+ Crc32_16,
+ Crc32_8,
Cvtdq2pd,
Cvtdq2ps,
Cvtpd2dq,