diff options
| author | LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> | 2020-12-07 10:37:07 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-12-07 10:37:07 +0100 |
| commit | 567ea726e173040ae931a37bc85fd6cd92b69363 (patch) | |
| tree | 5b6487d4821c978659732d5f34abf5aa69b0dafa /ARMeilleure/CodeGen | |
| parent | 668720b0883106fc1f44da70dddb8a3502ac7dbb (diff) | |
Add support for guest Fz (Fpcr) mode through host Ftz and Daz (Mxcsr) modes (fast paths). (#1630)
* Add support for guest Fz (Fpcr) mode through host Ftz and Daz (Mxcsr) modes (fast paths).
* Ptc.InternalVersion = 1630
* Nits.
* Address comments.
* Update Ptc.cs
* Address comment.
Diffstat (limited to 'ARMeilleure/CodeGen')
| -rw-r--r-- | ARMeilleure/CodeGen/X86/Assembler.cs | 14 | ||||
| -rw-r--r-- | ARMeilleure/CodeGen/X86/CodeGenerator.cs | 34 | ||||
| -rw-r--r-- | ARMeilleure/CodeGen/X86/IntrinsicTable.cs | 2 | ||||
| -rw-r--r-- | ARMeilleure/CodeGen/X86/IntrinsicType.cs | 1 | ||||
| -rw-r--r-- | ARMeilleure/CodeGen/X86/PreAllocator.cs | 10 | ||||
| -rw-r--r-- | ARMeilleure/CodeGen/X86/X86Instruction.cs | 3 |
6 files changed, 63 insertions, 1 deletions
diff --git a/ARMeilleure/CodeGen/X86/Assembler.cs b/ARMeilleure/CodeGen/X86/Assembler.cs index d65262ea..b9751059 100644 --- a/ARMeilleure/CodeGen/X86/Assembler.cs +++ b/ARMeilleure/CodeGen/X86/Assembler.cs @@ -72,7 +72,7 @@ namespace ARMeilleure.CodeGen.X86 { _instTable = new InstructionInfo[(int)X86Instruction.Count]; - // Name RM/R RM/I8 RM/I32 R/I64 R/RM Flags + // Name RM/R RM/I8 RM/I32 R/I64 R/RM Flags Add(X86Instruction.Add, new InstructionInfo(0x00000001, 0x00000083, 0x00000081, BadOp, 0x00000003, InstructionFlags.None)); Add(X86Instruction.Addpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Addps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex)); @@ -131,6 +131,7 @@ namespace ARMeilleure.CodeGen.X86 Add(X86Instruction.Imul128, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x050000f7, InstructionFlags.None)); Add(X86Instruction.Insertps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a21, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Jmp, new InstructionInfo(0x040000ff, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Ldmxcsr, new InstructionInfo(0x02000fae, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex)); Add(X86Instruction.Lea, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x0000008d, InstructionFlags.None)); Add(X86Instruction.Maxpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Maxps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex)); @@ -257,6 +258,7 @@ namespace ARMeilleure.CodeGen.X86 Add(X86Instruction.Sqrtps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex)); Add(X86Instruction.Sqrtsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF2)); Add(X86Instruction.Sqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Stmxcsr, new InstructionInfo(0x03000fae, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex)); Add(X86Instruction.Sub, new InstructionInfo(0x00000029, 0x05000083, 0x05000081, BadOp, 0x0000002b, InstructionFlags.None)); Add(X86Instruction.Subpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.Prefix66)); Add(X86Instruction.Subps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex)); @@ -526,6 +528,11 @@ namespace ARMeilleure.CodeGen.X86 WriteInstruction(dest, null, OperandType.None, X86Instruction.Jmp); } + public void Ldmxcsr(Operand dest) + { + WriteInstruction(dest, null, OperandType.I32, X86Instruction.Ldmxcsr); + } + public void Lea(Operand dest, Operand source, OperandType type) { WriteInstruction(dest, source, type, X86Instruction.Lea); @@ -796,6 +803,11 @@ namespace ARMeilleure.CodeGen.X86 WriteOpCode(dest, null, null, OperandType.None, info.Flags, info.OpRRM | (int)condition); } + public void Stmxcsr(Operand dest) + { + WriteInstruction(dest, null, OperandType.I32, X86Instruction.Stmxcsr); + } + public void Sub(Operand dest, Operand source, OperandType type) { WriteInstruction(dest, source, type, X86Instruction.Sub); diff --git a/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/ARMeilleure/CodeGen/X86/CodeGenerator.cs index 83ff136a..29a4cd78 100644 --- a/ARMeilleure/CodeGen/X86/CodeGenerator.cs +++ b/ARMeilleure/CodeGen/X86/CodeGenerator.cs @@ -250,6 +250,40 @@ namespace ARMeilleure.CodeGen.X86 break; } + case IntrinsicType.Mxcsr: + { + Operand offset = operation.GetSource(0); + Operand bits = operation.GetSource(1); + + Debug.Assert(offset.Kind == OperandKind.Constant && bits.Kind == OperandKind.Constant); + Debug.Assert(offset.Type == OperandType.I32 && bits.Type == OperandType.I32); + + int offs = offset.AsInt32() + context.CallArgsRegionSize; + + Operand rsp = Register(X86Register.Rsp); + + MemoryOperand memOp = MemoryOp(OperandType.I32, rsp, null, Multiplier.x1, offs); + + Debug.Assert(HardwareCapabilities.SupportsSse || HardwareCapabilities.SupportsVexEncoding); + + context.Assembler.Stmxcsr(memOp); + + if (intrinOp.Intrinsic == Intrinsic.X86Mxcsrmb) + { + context.Assembler.Or(memOp, bits, OperandType.I32); + } + else /* if (intrinOp.Intrinsic == Intrinsic.X86Mxcsrub) */ + { + Operand notBits = Const(~bits.AsInt32()); + + context.Assembler.And(memOp, notBits, OperandType.I32); + } + + context.Assembler.Ldmxcsr(memOp); + + break; + } + case IntrinsicType.PopCount: { Operand dest = operation.Destination; diff --git a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs index 310fdc51..cc0c797b 100644 --- a/ARMeilleure/CodeGen/X86/IntrinsicTable.cs +++ b/ARMeilleure/CodeGen/X86/IntrinsicTable.cs @@ -76,6 +76,8 @@ namespace ARMeilleure.CodeGen.X86 Add(Intrinsic.X86Mulps, new IntrinsicInfo(X86Instruction.Mulps, IntrinsicType.Binary)); Add(Intrinsic.X86Mulsd, new IntrinsicInfo(X86Instruction.Mulsd, IntrinsicType.Binary)); Add(Intrinsic.X86Mulss, new IntrinsicInfo(X86Instruction.Mulss, IntrinsicType.Binary)); + Add(Intrinsic.X86Mxcsrmb, new IntrinsicInfo(X86Instruction.None, IntrinsicType.Mxcsr)); // Mask bits. + Add(Intrinsic.X86Mxcsrub, new IntrinsicInfo(X86Instruction.None, IntrinsicType.Mxcsr)); // Unmask bits. Add(Intrinsic.X86Paddb, new IntrinsicInfo(X86Instruction.Paddb, IntrinsicType.Binary)); Add(Intrinsic.X86Paddd, new IntrinsicInfo(X86Instruction.Paddd, IntrinsicType.Binary)); Add(Intrinsic.X86Paddq, new IntrinsicInfo(X86Instruction.Paddq, IntrinsicType.Binary)); diff --git a/ARMeilleure/CodeGen/X86/IntrinsicType.cs b/ARMeilleure/CodeGen/X86/IntrinsicType.cs index 8248b048..5a9c14af 100644 --- a/ARMeilleure/CodeGen/X86/IntrinsicType.cs +++ b/ARMeilleure/CodeGen/X86/IntrinsicType.cs @@ -3,6 +3,7 @@ namespace ARMeilleure.CodeGen.X86 enum IntrinsicType { Comis_, + Mxcsr, PopCount, Unary, UnaryToGpr, diff --git a/ARMeilleure/CodeGen/X86/PreAllocator.cs b/ARMeilleure/CodeGen/X86/PreAllocator.cs index 3aaf315a..4969fa43 100644 --- a/ARMeilleure/CodeGen/X86/PreAllocator.cs +++ b/ARMeilleure/CodeGen/X86/PreAllocator.cs @@ -114,6 +114,16 @@ namespace ARMeilleure.CodeGen.X86 node = HandleVectorInsert8(block.Operations, node, operation); } break; + + case Instruction.Extended: + IntrinsicOperation intrinOp = (IntrinsicOperation)operation; + + if (intrinOp.Intrinsic == Intrinsic.X86Mxcsrmb || intrinOp.Intrinsic == Intrinsic.X86Mxcsrub) + { + int stackOffset = stackAlloc.Allocate(OperandType.I32); + operation.SetSources(new Operand[] { Const(stackOffset), operation.GetSource(0) }); + } + break; } } } diff --git a/ARMeilleure/CodeGen/X86/X86Instruction.cs b/ARMeilleure/CodeGen/X86/X86Instruction.cs index 1c4bbed4..166567ae 100644 --- a/ARMeilleure/CodeGen/X86/X86Instruction.cs +++ b/ARMeilleure/CodeGen/X86/X86Instruction.cs @@ -2,6 +2,7 @@ namespace ARMeilleure.CodeGen.X86 { enum X86Instruction { + None, Add, Addpd, Addps, @@ -60,6 +61,7 @@ namespace ARMeilleure.CodeGen.X86 Imul128, Insertps, Jmp, + Ldmxcsr, Lea, Maxpd, Maxps, @@ -186,6 +188,7 @@ namespace ARMeilleure.CodeGen.X86 Sqrtps, Sqrtsd, Sqrtss, + Stmxcsr, Sub, Subpd, Subps, |
