diff options
Diffstat (limited to 'src/ARMeilleure/CodeGen')
59 files changed, 16085 insertions, 0 deletions
diff --git a/src/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs b/src/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs new file mode 100644 index 00000000..fdd4d024 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs @@ -0,0 +1,270 @@ +using ARMeilleure.CodeGen.Optimizations; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System.Collections.Generic; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class Arm64Optimizer + { + private const int MaxConstantUses = 10000; + + public static void RunPass(ControlFlowGraph cfg) + { + var constants = new Dictionary<ulong, Operand>(); + + Operand GetConstantCopy(BasicBlock block, Operation operation, Operand source) + { + // If the constant has many uses, we also force a new constant mov to be added, in order + // to avoid overflow of the counts field (that is limited to 16 bits). + if (!constants.TryGetValue(source.Value, out var constant) || constant.UsesCount > MaxConstantUses) + { + constant = Local(source.Type); + + Operation copyOp = Operation(Instruction.Copy, constant, source); + + block.Operations.AddBefore(operation, copyOp); + + constants[source.Value] = constant; + } + + return constant; + } + + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + constants.Clear(); + + Operation nextNode; + + for (Operation node = block.Operations.First; node != default; node = nextNode) + { + nextNode = node.ListNext; + + // Insert copies for constants that can't fit on a 32-bit immediate. + // Doing this early unblocks a few optimizations. + if (node.Instruction == Instruction.Add) + { + Operand src1 = node.GetSource(0); + Operand src2 = node.GetSource(1); + + if (src1.Kind == OperandKind.Constant && (src1.Relocatable || ConstTooLong(src1, OperandType.I32))) + { + node.SetSource(0, GetConstantCopy(block, node, src1)); + } + + if (src2.Kind == OperandKind.Constant && (src2.Relocatable || ConstTooLong(src2, OperandType.I32))) + { + node.SetSource(1, GetConstantCopy(block, node, src2)); + } + } + + // Try to fold something like: + // lsl x1, x1, #2 + // add x0, x0, x1 + // ldr x0, [x0] + // add x2, x2, #16 + // ldr x2, [x2] + // Into: + // ldr x0, [x0, x1, lsl #2] + // ldr x2, [x2, #16] + if (IsMemoryLoadOrStore(node.Instruction)) + { + OperandType type; + + if (node.Destination != default) + { + type = node.Destination.Type; + } + else + { + type = node.GetSource(1).Type; + } + + Operand memOp = GetMemoryOperandOrNull(node.GetSource(0), type); + + if (memOp != default) + { + node.SetSource(0, memOp); + } + } + } + } + + Optimizer.RemoveUnusedNodes(cfg); + } + + private static Operand GetMemoryOperandOrNull(Operand addr, OperandType type) + { + Operand baseOp = addr; + + // First we check if the address is the result of a local X with immediate + // addition. If that is the case, then the baseOp is X, and the memory operand immediate + // becomes the addition immediate. Otherwise baseOp keeps being the address. + int imm = GetConstOp(ref baseOp, type); + if (imm != 0) + { + return MemoryOp(type, baseOp, default, Multiplier.x1, imm); + } + + // Now we check if the baseOp is the result of a local Y with a local Z addition. + // If that is the case, we now set baseOp to Y and indexOp to Z. We further check + // if Z is the result of a left shift of local W by a value == 0 or == Log2(AccessSize), + // if that is the case, we set indexOp to W and adjust the scale value of the memory operand + // to match that of the left shift. + // There is one missed case, which is the address being a shift result, but this is + // probably not worth optimizing as it should never happen. + (Operand indexOp, Multiplier scale) = GetIndexOp(ref baseOp, type); + + // If baseOp is still equal to address, then there's nothing that can be optimized. + if (baseOp == addr) + { + return default; + } + + return MemoryOp(type, baseOp, indexOp, scale, 0); + } + + private static int GetConstOp(ref Operand baseOp, OperandType accessType) + { + Operation operation = GetAsgOpWithInst(baseOp, Instruction.Add); + + if (operation == default) + { + return 0; + } + + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + Operand constOp; + Operand otherOp; + + if (src1.Kind == OperandKind.Constant && src2.Kind == OperandKind.LocalVariable) + { + constOp = src1; + otherOp = src2; + } + else if (src1.Kind == OperandKind.LocalVariable && src2.Kind == OperandKind.Constant) + { + constOp = src2; + otherOp = src1; + } + else + { + return 0; + } + + // If we have addition by a constant that we can't encode on the instruction, + // then we can't optimize it further. + if (ConstTooLong(constOp, accessType)) + { + return 0; + } + + baseOp = otherOp; + + return constOp.AsInt32(); + } + + private static (Operand, Multiplier) GetIndexOp(ref Operand baseOp, OperandType accessType) + { + Operand indexOp = default; + + Multiplier scale = Multiplier.x1; + + Operation addOp = GetAsgOpWithInst(baseOp, Instruction.Add); + + if (addOp == default) + { + return (indexOp, scale); + } + + Operand src1 = addOp.GetSource(0); + Operand src2 = addOp.GetSource(1); + + if (src1.Kind != OperandKind.LocalVariable || src2.Kind != OperandKind.LocalVariable) + { + return (indexOp, scale); + } + + baseOp = src1; + indexOp = src2; + + Operation shlOp = GetAsgOpWithInst(src1, Instruction.ShiftLeft); + + bool indexOnSrc2 = false; + + if (shlOp == default) + { + shlOp = GetAsgOpWithInst(src2, Instruction.ShiftLeft); + + indexOnSrc2 = true; + } + + if (shlOp != default) + { + Operand shSrc = shlOp.GetSource(0); + Operand shift = shlOp.GetSource(1); + + int maxShift = Assembler.GetScaleForType(accessType); + + if (shSrc.Kind == OperandKind.LocalVariable && + shift.Kind == OperandKind.Constant && + (shift.Value == 0 || shift.Value == (ulong)maxShift)) + { + scale = shift.Value switch + { + 1 => Multiplier.x2, + 2 => Multiplier.x4, + 3 => Multiplier.x8, + 4 => Multiplier.x16, + _ => Multiplier.x1 + }; + + baseOp = indexOnSrc2 ? src1 : src2; + indexOp = shSrc; + } + } + + return (indexOp, scale); + } + + private static Operation GetAsgOpWithInst(Operand op, Instruction inst) + { + // If we have multiple assignments, folding is not safe + // as the value may be different depending on the + // control flow path. + if (op.AssignmentsCount != 1) + { + return default; + } + + Operation asgOp = op.Assignments[0]; + + if (asgOp.Instruction != inst) + { + return default; + } + + return asgOp; + } + + private static bool IsMemoryLoadOrStore(Instruction inst) + { + return inst == Instruction.Load || inst == Instruction.Store; + } + + private static bool ConstTooLong(Operand constOp, OperandType accessType) + { + if ((uint)constOp.Value != constOp.Value) + { + return true; + } + + return !CodeGenCommon.ConstFitsOnUImm12(constOp.AsInt32(), accessType); + } + } +} diff --git a/src/ARMeilleure/CodeGen/Arm64/ArmCondition.cs b/src/ARMeilleure/CodeGen/Arm64/ArmCondition.cs new file mode 100644 index 00000000..db27a810 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/ArmCondition.cs @@ -0,0 +1,47 @@ +using ARMeilleure.IntermediateRepresentation; +using System; + +namespace ARMeilleure.CodeGen.Arm64 +{ + enum ArmCondition + { + Eq = 0, + Ne = 1, + GeUn = 2, + LtUn = 3, + Mi = 4, + Pl = 5, + Vs = 6, + Vc = 7, + GtUn = 8, + LeUn = 9, + Ge = 10, + Lt = 11, + Gt = 12, + Le = 13, + Al = 14, + Nv = 15 + } + + static class ComparisonArm64Extensions + { + public static ArmCondition ToArmCondition(this Comparison comp) + { + return comp switch + { + Comparison.Equal => ArmCondition.Eq, + Comparison.NotEqual => ArmCondition.Ne, + Comparison.Greater => ArmCondition.Gt, + Comparison.LessOrEqual => ArmCondition.Le, + Comparison.GreaterUI => ArmCondition.GtUn, + Comparison.LessOrEqualUI => ArmCondition.LeUn, + Comparison.GreaterOrEqual => ArmCondition.Ge, + Comparison.Less => ArmCondition.Lt, + Comparison.GreaterOrEqualUI => ArmCondition.GeUn, + Comparison.LessUI => ArmCondition.LtUn, + + _ => throw new ArgumentException(null, nameof(comp)) + }; + } + } +} diff --git a/src/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs b/src/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs new file mode 100644 index 00000000..062a6d0b --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.CodeGen.Arm64 +{ + enum ArmExtensionType + { + Uxtb = 0, + Uxth = 1, + Uxtw = 2, + Uxtx = 3, + Sxtb = 4, + Sxth = 5, + Sxtw = 6, + Sxtx = 7 + } +} diff --git a/src/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs b/src/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs new file mode 100644 index 00000000..d223a146 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs @@ -0,0 +1,11 @@ + +namespace ARMeilleure.CodeGen.Arm64 +{ + enum ArmShiftType + { + Lsl = 0, + Lsr = 1, + Asr = 2, + Ror = 3 + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Arm64/Assembler.cs b/src/ARMeilleure/CodeGen/Arm64/Assembler.cs new file mode 100644 index 00000000..0ec0be7c --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/Assembler.cs @@ -0,0 +1,1160 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Diagnostics; +using System.IO; +using static ARMeilleure.IntermediateRepresentation.Operand; + +namespace ARMeilleure.CodeGen.Arm64 +{ + class Assembler + { + public const uint SfFlag = 1u << 31; + + private const int SpRegister = 31; + private const int ZrRegister = 31; + + private readonly Stream _stream; + + public Assembler(Stream stream) + { + _stream = stream; + } + + public void Add(Operand rd, Operand rn, Operand rm, ArmExtensionType extensionType, int shiftAmount = 0) + { + WriteInstructionAuto(0x0b200000u, rd, rn, rm, extensionType, shiftAmount); + } + + public void Add(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0, bool immForm = false) + { + WriteInstructionAuto(0x11000000u, 0x0b000000u, rd, rn, rm, shiftType, shiftAmount, immForm); + } + + public void And(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionBitwiseAuto(0x12000000u, 0x0a000000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void Ands(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionBitwiseAuto(0x72000000u, 0x6a000000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void Asr(Operand rd, Operand rn, Operand rm) + { + if (rm.Kind == OperandKind.Constant) + { + int shift = rm.AsInt32(); + int mask = rd.Type == OperandType.I64 ? 63 : 31; + shift &= mask; + Sbfm(rd, rn, shift, mask); + } + else + { + Asrv(rd, rn, rm); + } + } + + public void Asrv(Operand rd, Operand rn, Operand rm) + { + WriteInstructionBitwiseAuto(0x1ac02800u, rd, rn, rm); + } + + public void B(int imm) + { + WriteUInt32(0x14000000u | EncodeSImm26_2(imm)); + } + + public void B(ArmCondition condition, int imm) + { + WriteUInt32(0x54000000u | (uint)condition | (EncodeSImm19_2(imm) << 5)); + } + + public void Blr(Operand rn) + { + WriteUInt32(0xd63f0000u | (EncodeReg(rn) << 5)); + } + + public void Br(Operand rn) + { + WriteUInt32(0xd61f0000u | (EncodeReg(rn) << 5)); + } + + public void Brk() + { + WriteUInt32(0xd4200000u); + } + + public void Cbz(Operand rt, int imm) + { + WriteInstructionAuto(0x34000000u | (EncodeSImm19_2(imm) << 5), rt); + } + + public void Cbnz(Operand rt, int imm) + { + WriteInstructionAuto(0x35000000u | (EncodeSImm19_2(imm) << 5), rt); + } + + public void Clrex(int crm = 15) + { + WriteUInt32(0xd503305fu | (EncodeUImm4(crm) << 8)); + } + + public void Clz(Operand rd, Operand rn) + { + WriteInstructionAuto(0x5ac01000u, rd, rn); + } + + public void CmeqVector(Operand rd, Operand rn, Operand rm, int size, bool q = true) + { + Debug.Assert((uint)size < 4); + WriteSimdInstruction(0x2e208c00u | ((uint)size << 22), rd, rn, rm, q); + } + + public void Cmp(Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + Subs(Factory.Register(ZrRegister, RegisterType.Integer, rn.Type), rn, rm, shiftType, shiftAmount); + } + + public void Csel(Operand rd, Operand rn, Operand rm, ArmCondition condition) + { + WriteInstructionBitwiseAuto(0x1a800000u | ((uint)condition << 12), rd, rn, rm); + } + + public void Cset(Operand rd, ArmCondition condition) + { + var zr = Factory.Register(ZrRegister, RegisterType.Integer, rd.Type); + Csinc(rd, zr, zr, (ArmCondition)((int)condition ^ 1)); + } + + public void Csinc(Operand rd, Operand rn, Operand rm, ArmCondition condition) + { + WriteInstructionBitwiseAuto(0x1a800400u | ((uint)condition << 12), rd, rn, rm); + } + + public void Dmb(uint option) + { + WriteUInt32(0xd50330bfu | (option << 8)); + } + + public void DupScalar(Operand rd, Operand rn, int index, int size) + { + WriteInstruction(0x5e000400u | (EncodeIndexSizeImm5(index, size) << 16), rd, rn); + } + + public void Eor(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionBitwiseAuto(0x52000000u, 0x4a000000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void EorVector(Operand rd, Operand rn, Operand rm, bool q = true) + { + WriteSimdInstruction(0x2e201c00u, rd, rn, rm, q); + } + + public void Extr(Operand rd, Operand rn, Operand rm, int imms) + { + uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u; + WriteInstructionBitwiseAuto(0x13800000u | n | (EncodeUImm6(imms) << 10), rd, rn, rm); + } + + public void FaddScalar(Operand rd, Operand rn, Operand rm) + { + WriteFPInstructionAuto(0x1e202800u, rd, rn, rm); + } + + public void FcvtScalar(Operand rd, Operand rn) + { + uint instruction = 0x1e224000u | (rd.Type == OperandType.FP64 ? 1u << 15 : 1u << 22); + WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5)); + } + + public void FdivScalar(Operand rd, Operand rn, Operand rm) + { + WriteFPInstructionAuto(0x1e201800u, rd, rn, rm); + } + + public void Fmov(Operand rd, Operand rn) + { + WriteFPInstructionAuto(0x1e204000u, rd, rn); + } + + public void Fmov(Operand rd, Operand rn, bool topHalf) + { + Debug.Assert(rd.Type.IsInteger() != rn.Type.IsInteger()); + Debug.Assert(rd.Type == OperandType.I64 || rn.Type == OperandType.I64 || !topHalf); + + uint opcode = rd.Type.IsInteger() ? 0b110u : 0b111u; + + uint rmode = topHalf ? 1u << 19 : 0u; + uint ftype = rd.Type == OperandType.FP64 || rn.Type == OperandType.FP64 ? 1u << 22 : 0u; + uint sf = rd.Type == OperandType.I64 || rn.Type == OperandType.I64 ? SfFlag : 0u; + + WriteUInt32(0x1e260000u | (opcode << 16) | rmode | ftype | sf | EncodeReg(rd) | (EncodeReg(rn) << 5)); + } + + public void FmulScalar(Operand rd, Operand rn, Operand rm) + { + WriteFPInstructionAuto(0x1e200800u, rd, rn, rm); + } + + public void FnegScalar(Operand rd, Operand rn) + { + WriteFPInstructionAuto(0x1e214000u, rd, rn); + } + + public void FsubScalar(Operand rd, Operand rn, Operand rm) + { + WriteFPInstructionAuto(0x1e203800u, rd, rn, rm); + } + + public void Ins(Operand rd, Operand rn, int index, int size) + { + WriteInstruction(0x4e001c00u | (EncodeIndexSizeImm5(index, size) << 16), rd, rn); + } + + public void Ins(Operand rd, Operand rn, int srcIndex, int dstIndex, int size) + { + uint imm4 = (uint)srcIndex << size; + Debug.Assert((uint)srcIndex < (16u >> size)); + WriteInstruction(0x6e000400u | (imm4 << 11) | (EncodeIndexSizeImm5(dstIndex, size) << 16), rd, rn); + } + + public void Ldaxp(Operand rt, Operand rt2, Operand rn) + { + WriteInstruction(0x887f8000u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rt2); + } + + public void Ldaxr(Operand rt, Operand rn) + { + WriteInstruction(0x085ffc00u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn); + } + + public void Ldaxrb(Operand rt, Operand rn) + { + WriteInstruction(0x085ffc00u, rt, rn); + } + + public void Ldaxrh(Operand rt, Operand rn) + { + WriteInstruction(0x085ffc00u | (1u << 30), rt, rn); + } + + public void LdpRiPost(Operand rt, Operand rt2, Operand rn, int imm) + { + uint instruction = GetLdpStpInstruction(0x28c00000u, 0x2cc00000u, imm, rt.Type); + WriteInstruction(instruction, rt, rn, rt2); + } + + public void LdpRiPre(Operand rt, Operand rt2, Operand rn, int imm) + { + uint instruction = GetLdpStpInstruction(0x29c00000u, 0x2dc00000u, imm, rt.Type); + WriteInstruction(instruction, rt, rn, rt2); + } + + public void LdpRiUn(Operand rt, Operand rt2, Operand rn, int imm) + { + uint instruction = GetLdpStpInstruction(0x29400000u, 0x2d400000u, imm, rt.Type); + WriteInstruction(instruction, rt, rn, rt2); + } + + public void Ldr(Operand rt, Operand rn) + { + if (rn.Kind == OperandKind.Memory) + { + MemoryOperand memOp = rn.GetMemory(); + + if (memOp.Index != default) + { + Debug.Assert(memOp.Displacement == 0); + Debug.Assert(memOp.Scale == Multiplier.x1 || (int)memOp.Scale == GetScaleForType(rt.Type)); + LdrRr(rt, memOp.BaseAddress, memOp.Index, ArmExtensionType.Uxtx, memOp.Scale != Multiplier.x1); + } + else + { + LdrRiUn(rt, memOp.BaseAddress, memOp.Displacement); + } + } + else + { + LdrRiUn(rt, rn, 0); + } + } + + public void LdrLit(Operand rt, int offset) + { + uint instruction = 0x18000000u | (EncodeSImm19_2(offset) << 5); + + if (rt.Type == OperandType.I64) + { + instruction |= 1u << 30; + } + + WriteInstruction(instruction, rt); + } + + public void LdrRiPost(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb8400400u, 0x3c400400u, rt.Type) | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void LdrRiPre(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb8400c00u, 0x3c400c00u, rt.Type) | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void LdrRiUn(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb9400000u, 0x3d400000u, rt.Type) | (EncodeUImm12(imm, rt.Type) << 10); + WriteInstruction(instruction, rt, rn); + } + + public void LdrRr(Operand rt, Operand rn, Operand rm, ArmExtensionType extensionType, bool shift) + { + uint instruction = GetLdrStrInstruction(0xb8600800u, 0x3ce00800u, rt.Type); + WriteInstructionLdrStrAuto(instruction, rt, rn, rm, extensionType, shift); + } + + public void LdrbRiPost(Operand rt, Operand rn, int imm) + { + uint instruction = 0x38400400u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void LdrbRiPre(Operand rt, Operand rn, int imm) + { + uint instruction = 0x38400c00u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void LdrbRiUn(Operand rt, Operand rn, int imm) + { + uint instruction = 0x39400000u | (EncodeUImm12(imm, 0) << 10); + WriteInstruction(instruction, rt, rn); + } + + public void LdrhRiPost(Operand rt, Operand rn, int imm) + { + uint instruction = 0x78400400u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void LdrhRiPre(Operand rt, Operand rn, int imm) + { + uint instruction = 0x78400c00u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void LdrhRiUn(Operand rt, Operand rn, int imm) + { + uint instruction = 0x79400000u | (EncodeUImm12(imm, 1) << 10); + WriteInstruction(instruction, rt, rn); + } + + public void Ldur(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb8400000u, 0x3c400000u, rt.Type) | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void Lsl(Operand rd, Operand rn, Operand rm) + { + if (rm.Kind == OperandKind.Constant) + { + int shift = rm.AsInt32(); + int mask = rd.Type == OperandType.I64 ? 63 : 31; + shift &= mask; + Ubfm(rd, rn, -shift & mask, mask - shift); + } + else + { + Lslv(rd, rn, rm); + } + } + + public void Lslv(Operand rd, Operand rn, Operand rm) + { + WriteInstructionBitwiseAuto(0x1ac02000u, rd, rn, rm); + } + + public void Lsr(Operand rd, Operand rn, Operand rm) + { + if (rm.Kind == OperandKind.Constant) + { + int shift = rm.AsInt32(); + int mask = rd.Type == OperandType.I64 ? 63 : 31; + shift &= mask; + Ubfm(rd, rn, shift, mask); + } + else + { + Lsrv(rd, rn, rm); + } + } + + public void Lsrv(Operand rd, Operand rn, Operand rm) + { + WriteInstructionBitwiseAuto(0x1ac02400u, rd, rn, rm); + } + + public void Madd(Operand rd, Operand rn, Operand rm, Operand ra) + { + WriteInstructionAuto(0x1b000000u, rd, rn, rm, ra); + } + + public void Mul(Operand rd, Operand rn, Operand rm) + { + Madd(rd, rn, rm, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type)); + } + + public void Mov(Operand rd, Operand rn) + { + if (rd.Type.IsInteger()) + { + Orr(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn); + } + else + { + OrrVector(rd, rn, rn); + } + } + + public void MovSp(Operand rd, Operand rn) + { + if (rd.GetRegister().Index == SpRegister || + rn.GetRegister().Index == SpRegister) + { + Add(rd, rn, Factory.Const(rd.Type, 0), immForm: true); + } + else + { + Mov(rd, rn); + } + } + + public void Mov(Operand rd, int imm) + { + Movz(rd, imm, 0); + } + + public void Movz(Operand rd, int imm, int hw) + { + Debug.Assert((hw & (rd.Type == OperandType.I64 ? 3 : 1)) == hw); + WriteInstructionAuto(0x52800000u | (EncodeUImm16(imm) << 5) | ((uint)hw << 21), rd); + } + + public void Movk(Operand rd, int imm, int hw) + { + Debug.Assert((hw & (rd.Type == OperandType.I64 ? 3 : 1)) == hw); + WriteInstructionAuto(0x72800000u | (EncodeUImm16(imm) << 5) | ((uint)hw << 21), rd); + } + + public void Mrs(Operand rt, uint o0, uint op1, uint crn, uint crm, uint op2) + { + uint instruction = 0xd5300000u; + + instruction |= (op2 & 7) << 5; + instruction |= (crm & 15) << 8; + instruction |= (crn & 15) << 12; + instruction |= (op1 & 7) << 16; + instruction |= (o0 & 1) << 19; + + WriteInstruction(instruction, rt); + } + + public void Mvn(Operand rd, Operand rn, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + Orn(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn, shiftType, shiftAmount); + } + + public void Neg(Operand rd, Operand rn, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + Sub(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn, shiftType, shiftAmount); + } + + public void Orn(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionBitwiseAuto(0x2a200000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void Orr(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionBitwiseAuto(0x32000000u, 0x2a000000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void OrrVector(Operand rd, Operand rn, Operand rm, bool q = true) + { + WriteSimdInstruction(0x0ea01c00u, rd, rn, rm, q); + } + + public void Ret(Operand rn) + { + WriteUInt32(0xd65f0000u | (EncodeReg(rn) << 5)); + } + + public void Rev(Operand rd, Operand rn) + { + uint opc0 = rd.Type == OperandType.I64 ? 1u << 10 : 0u; + WriteInstructionAuto(0x5ac00800u | opc0, rd, rn); + } + + public void Ror(Operand rd, Operand rn, Operand rm) + { + if (rm.Kind == OperandKind.Constant) + { + int shift = rm.AsInt32(); + int mask = rd.Type == OperandType.I64 ? 63 : 31; + shift &= mask; + Extr(rd, rn, rn, shift); + } + else + { + Rorv(rd, rn, rm); + } + } + + public void Rorv(Operand rd, Operand rn, Operand rm) + { + WriteInstructionBitwiseAuto(0x1ac02c00u, rd, rn, rm); + } + + public void Sbfm(Operand rd, Operand rn, int immr, int imms) + { + uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u; + WriteInstructionAuto(0x13000000u | n | (EncodeUImm6(imms) << 10) | (EncodeUImm6(immr) << 16), rd, rn); + } + + public void ScvtfScalar(Operand rd, Operand rn) + { + uint instruction = 0x1e220000u; + + if (rn.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + WriteFPInstructionAuto(instruction, rd, rn); + } + + public void Sdiv(Operand rd, Operand rn, Operand rm) + { + WriteInstructionRm16Auto(0x1ac00c00u, rd, rn, rm); + } + + public void Smulh(Operand rd, Operand rn, Operand rm) + { + WriteInstructionRm16(0x9b407c00u, rd, rn, rm); + } + + public void Stlxp(Operand rt, Operand rt2, Operand rn, Operand rs) + { + WriteInstruction(0x88208000u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rs, rt2); + } + + public void Stlxr(Operand rt, Operand rn, Operand rs) + { + WriteInstructionRm16(0x0800fc00u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rs); + } + + public void Stlxrb(Operand rt, Operand rn, Operand rs) + { + WriteInstructionRm16(0x0800fc00u, rt, rn, rs); + } + + public void Stlxrh(Operand rt, Operand rn, Operand rs) + { + WriteInstructionRm16(0x0800fc00u | (1u << 30), rt, rn, rs); + } + + public void StpRiPost(Operand rt, Operand rt2, Operand rn, int imm) + { + uint instruction = GetLdpStpInstruction(0x28800000u, 0x2c800000u, imm, rt.Type); + WriteInstruction(instruction, rt, rn, rt2); + } + + public void StpRiPre(Operand rt, Operand rt2, Operand rn, int imm) + { + uint instruction = GetLdpStpInstruction(0x29800000u, 0x2d800000u, imm, rt.Type); + WriteInstruction(instruction, rt, rn, rt2); + } + + public void StpRiUn(Operand rt, Operand rt2, Operand rn, int imm) + { + uint instruction = GetLdpStpInstruction(0x29000000u, 0x2d000000u, imm, rt.Type); + WriteInstruction(instruction, rt, rn, rt2); + } + + public void Str(Operand rt, Operand rn) + { + if (rn.Kind == OperandKind.Memory) + { + MemoryOperand memOp = rn.GetMemory(); + + if (memOp.Index != default) + { + Debug.Assert(memOp.Displacement == 0); + Debug.Assert(memOp.Scale == Multiplier.x1 || (int)memOp.Scale == GetScaleForType(rt.Type)); + StrRr(rt, memOp.BaseAddress, memOp.Index, ArmExtensionType.Uxtx, memOp.Scale != Multiplier.x1); + } + else + { + StrRiUn(rt, memOp.BaseAddress, memOp.Displacement); + } + } + else + { + StrRiUn(rt, rn, 0); + } + } + + public void StrRiPost(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb8000400u, 0x3c000400u, rt.Type) | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void StrRiPre(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb8000c00u, 0x3c000c00u, rt.Type) | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void StrRiUn(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb9000000u, 0x3d000000u, rt.Type) | (EncodeUImm12(imm, rt.Type) << 10); + WriteInstruction(instruction, rt, rn); + } + + public void StrRr(Operand rt, Operand rn, Operand rm, ArmExtensionType extensionType, bool shift) + { + uint instruction = GetLdrStrInstruction(0xb8200800u, 0x3ca00800u, rt.Type); + WriteInstructionLdrStrAuto(instruction, rt, rn, rm, extensionType, shift); + } + + public void StrbRiPost(Operand rt, Operand rn, int imm) + { + uint instruction = 0x38000400u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void StrbRiPre(Operand rt, Operand rn, int imm) + { + uint instruction = 0x38000c00u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void StrbRiUn(Operand rt, Operand rn, int imm) + { + uint instruction = 0x39000000u | (EncodeUImm12(imm, 0) << 10); + WriteInstruction(instruction, rt, rn); + } + + public void StrhRiPost(Operand rt, Operand rn, int imm) + { + uint instruction = 0x78000400u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void StrhRiPre(Operand rt, Operand rn, int imm) + { + uint instruction = 0x78000c00u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void StrhRiUn(Operand rt, Operand rn, int imm) + { + uint instruction = 0x79000000u | (EncodeUImm12(imm, 1) << 10); + WriteInstruction(instruction, rt, rn); + } + + public void Stur(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb8000000u, 0x3c000000u, rt.Type) | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void Sub(Operand rd, Operand rn, Operand rm, ArmExtensionType extensionType, int shiftAmount = 0) + { + WriteInstructionAuto(0x4b200000u, rd, rn, rm, extensionType, shiftAmount); + } + + public void Sub(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionAuto(0x51000000u, 0x4b000000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void Subs(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionAuto(0x71000000u, 0x6b000000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void Sxtb(Operand rd, Operand rn) + { + Sbfm(rd, rn, 0, 7); + } + + public void Sxth(Operand rd, Operand rn) + { + Sbfm(rd, rn, 0, 15); + } + + public void Sxtw(Operand rd, Operand rn) + { + Sbfm(rd, rn, 0, 31); + } + + public void Tst(Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + Ands(Factory.Register(ZrRegister, RegisterType.Integer, rn.Type), rn, rm, shiftType, shiftAmount); + } + + public void Ubfm(Operand rd, Operand rn, int immr, int imms) + { + uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u; + WriteInstructionAuto(0x53000000u | n | (EncodeUImm6(imms) << 10) | (EncodeUImm6(immr) << 16), rd, rn); + } + + public void UcvtfScalar(Operand rd, Operand rn) + { + uint instruction = 0x1e230000u; + + if (rn.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + WriteFPInstructionAuto(instruction, rd, rn); + } + + public void Udiv(Operand rd, Operand rn, Operand rm) + { + WriteInstructionRm16Auto(0x1ac00800u, rd, rn, rm); + } + + public void Umov(Operand rd, Operand rn, int index, int size) + { + uint q = size == 3 ? 1u << 30 : 0u; + WriteInstruction(0x0e003c00u | (EncodeIndexSizeImm5(index, size) << 16) | q, rd, rn); + } + + public void Umulh(Operand rd, Operand rn, Operand rm) + { + WriteInstructionRm16(0x9bc07c00u, rd, rn, rm); + } + + public void Uxtb(Operand rd, Operand rn) + { + Ubfm(rd, rn, 0, 7); + } + + public void Uxth(Operand rd, Operand rn) + { + Ubfm(rd, rn, 0, 15); + } + + private void WriteInstructionAuto( + uint instI, + uint instR, + Operand rd, + Operand rn, + Operand rm, + ArmShiftType shiftType = ArmShiftType.Lsl, + int shiftAmount = 0, + bool immForm = false) + { + if (rm.Kind == OperandKind.Constant && (rm.Value != 0 || immForm)) + { + Debug.Assert(shiftAmount == 0); + int imm = rm.AsInt32(); + Debug.Assert((uint)imm == rm.Value); + if (imm != 0 && (imm & 0xfff) == 0) + { + instI |= 1 << 22; // sh flag + imm >>= 12; + } + WriteInstructionAuto(instI | (EncodeUImm12(imm, 0) << 10), rd, rn); + } + else + { + instR |= EncodeUImm6(shiftAmount) << 10; + instR |= (uint)shiftType << 22; + + WriteInstructionRm16Auto(instR, rd, rn, rm); + } + } + + private void WriteInstructionAuto( + uint instruction, + Operand rd, + Operand rn, + Operand rm, + ArmExtensionType extensionType, + int shiftAmount = 0) + { + Debug.Assert((uint)shiftAmount <= 4); + + instruction |= (uint)shiftAmount << 10; + instruction |= (uint)extensionType << 13; + + WriteInstructionRm16Auto(instruction, rd, rn, rm); + } + + private void WriteInstructionBitwiseAuto( + uint instI, + uint instR, + Operand rd, + Operand rn, + Operand rm, + ArmShiftType shiftType = ArmShiftType.Lsl, + int shiftAmount = 0) + { + if (rm.Kind == OperandKind.Constant && rm.Value != 0) + { + Debug.Assert(shiftAmount == 0); + bool canEncode = CodeGenCommon.TryEncodeBitMask(rm, out int immN, out int immS, out int immR); + Debug.Assert(canEncode); + uint instruction = instI | ((uint)immS << 10) | ((uint)immR << 16) | ((uint)immN << 22); + + WriteInstructionAuto(instruction, rd, rn); + } + else + { + WriteInstructionBitwiseAuto(instR, rd, rn, rm, shiftType, shiftAmount); + } + } + + private void WriteInstructionBitwiseAuto( + uint instruction, + Operand rd, + Operand rn, + Operand rm, + ArmShiftType shiftType = ArmShiftType.Lsl, + int shiftAmount = 0) + { + if (rd.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + instruction |= EncodeUImm6(shiftAmount) << 10; + instruction |= (uint)shiftType << 22; + + WriteInstructionRm16(instruction, rd, rn, rm); + } + + private void WriteInstructionLdrStrAuto( + uint instruction, + Operand rd, + Operand rn, + Operand rm, + ArmExtensionType extensionType, + bool shift) + { + if (shift) + { + instruction |= 1u << 12; + } + + instruction |= (uint)extensionType << 13; + + if (rd.Type == OperandType.I64) + { + instruction |= 1u << 30; + } + + WriteInstructionRm16(instruction, rd, rn, rm); + } + + private void WriteInstructionAuto(uint instruction, Operand rd) + { + if (rd.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + WriteInstruction(instruction, rd); + } + + public void WriteInstructionAuto(uint instruction, Operand rd, Operand rn) + { + if (rd.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + WriteInstruction(instruction, rd, rn); + } + + private void WriteInstructionAuto(uint instruction, Operand rd, Operand rn, Operand rm, Operand ra) + { + if (rd.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + WriteInstruction(instruction, rd, rn, rm, ra); + } + + public void WriteInstruction(uint instruction, Operand rd) + { + WriteUInt32(instruction | EncodeReg(rd)); + } + + public void WriteInstruction(uint instruction, Operand rd, Operand rn) + { + WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5)); + } + + public void WriteInstruction(uint instruction, Operand rd, Operand rn, Operand rm) + { + WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 10)); + } + + public void WriteInstruction(uint instruction, Operand rd, Operand rn, Operand rm, Operand ra) + { + WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(ra) << 10) | (EncodeReg(rm) << 16)); + } + + private void WriteFPInstructionAuto(uint instruction, Operand rd, Operand rn) + { + if (rd.Type == OperandType.FP64) + { + instruction |= 1u << 22; + } + + WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5)); + } + + private void WriteFPInstructionAuto(uint instruction, Operand rd, Operand rn, Operand rm) + { + if (rd.Type == OperandType.FP64) + { + instruction |= 1u << 22; + } + + WriteInstructionRm16(instruction, rd, rn, rm); + } + + private void WriteSimdInstruction(uint instruction, Operand rd, Operand rn, Operand rm, bool q = true) + { + if (q) + { + instruction |= 1u << 30; + } + + WriteInstructionRm16(instruction, rd, rn, rm); + } + + private void WriteInstructionRm16Auto(uint instruction, Operand rd, Operand rn, Operand rm) + { + if (rd.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + WriteInstructionRm16(instruction, rd, rn, rm); + } + + public void WriteInstructionRm16(uint instruction, Operand rd, Operand rn, Operand rm) + { + WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 16)); + } + + public void WriteInstructionRm16NoRet(uint instruction, Operand rn, Operand rm) + { + WriteUInt32(instruction | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 16)); + } + + private static uint GetLdpStpInstruction(uint intInst, uint vecInst, int imm, OperandType type) + { + uint instruction; + int scale; + + if (type.IsInteger()) + { + instruction = intInst; + + if (type == OperandType.I64) + { + instruction |= SfFlag; + scale = 3; + } + else + { + scale = 2; + } + } + else + { + int opc = type switch + { + OperandType.FP32 => 0, + OperandType.FP64 => 1, + _ => 2 + }; + + instruction = vecInst | ((uint)opc << 30); + scale = 2 + opc; + } + + instruction |= (EncodeSImm7(imm, scale) << 15); + + return instruction; + } + + private static uint GetLdrStrInstruction(uint intInst, uint vecInst, OperandType type) + { + uint instruction; + + if (type.IsInteger()) + { + instruction = intInst; + + if (type == OperandType.I64) + { + instruction |= 1 << 30; + } + } + else + { + instruction = vecInst; + + if (type == OperandType.V128) + { + instruction |= 1u << 23; + } + else + { + instruction |= type == OperandType.FP32 ? 2u << 30 : 3u << 30; + } + } + + return instruction; + } + + private static uint EncodeIndexSizeImm5(int index, int size) + { + Debug.Assert((uint)size < 4); + Debug.Assert((uint)index < (16u >> size), $"Invalid index {index} and size {size} combination."); + return ((uint)index << (size + 1)) | (1u << size); + } + + private static uint EncodeSImm7(int value, int scale) + { + uint imm = (uint)(value >> scale) & 0x7f; + Debug.Assert(((int)imm << 25) >> (25 - scale) == value, $"Failed to encode constant 0x{value:X} with scale {scale}."); + return imm; + } + + private static uint EncodeSImm9(int value) + { + uint imm = (uint)value & 0x1ff; + Debug.Assert(((int)imm << 23) >> 23 == value, $"Failed to encode constant 0x{value:X}."); + return imm; + } + + private static uint EncodeSImm19_2(int value) + { + uint imm = (uint)(value >> 2) & 0x7ffff; + Debug.Assert(((int)imm << 13) >> 11 == value, $"Failed to encode constant 0x{value:X}."); + return imm; + } + + private static uint EncodeSImm26_2(int value) + { + uint imm = (uint)(value >> 2) & 0x3ffffff; + Debug.Assert(((int)imm << 6) >> 4 == value, $"Failed to encode constant 0x{value:X}."); + return imm; + } + + private static uint EncodeUImm4(int value) + { + uint imm = (uint)value & 0xf; + Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}."); + return imm; + } + + private static uint EncodeUImm6(int value) + { + uint imm = (uint)value & 0x3f; + Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}."); + return imm; + } + + private static uint EncodeUImm12(int value, OperandType type) + { + return EncodeUImm12(value, GetScaleForType(type)); + } + + private static uint EncodeUImm12(int value, int scale) + { + uint imm = (uint)(value >> scale) & 0xfff; + Debug.Assert((int)imm << scale == value, $"Failed to encode constant 0x{value:X} with scale {scale}."); + return imm; + } + + private static uint EncodeUImm16(int value) + { + uint imm = (uint)value & 0xffff; + Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}."); + return imm; + } + + private static uint EncodeReg(Operand reg) + { + if (reg.Kind == OperandKind.Constant && reg.Value == 0) + { + return ZrRegister; + } + + uint regIndex = (uint)reg.GetRegister().Index; + Debug.Assert(reg.Kind == OperandKind.Register); + Debug.Assert(regIndex < 32); + return regIndex; + } + + public static int GetScaleForType(OperandType type) + { + return type switch + { + OperandType.I32 => 2, + OperandType.I64 => 3, + OperandType.FP32 => 2, + OperandType.FP64 => 3, + OperandType.V128 => 4, + _ => throw new ArgumentException($"Invalid type {type}.") + }; + } + + private void WriteInt16(short value) + { + WriteUInt16((ushort)value); + } + + private void WriteInt32(int value) + { + WriteUInt32((uint)value); + } + + private void WriteByte(byte value) + { + _stream.WriteByte(value); + } + + private void WriteUInt16(ushort value) + { + _stream.WriteByte((byte)(value >> 0)); + _stream.WriteByte((byte)(value >> 8)); + } + + private void WriteUInt32(uint value) + { + _stream.WriteByte((byte)(value >> 0)); + _stream.WriteByte((byte)(value >> 8)); + _stream.WriteByte((byte)(value >> 16)); + _stream.WriteByte((byte)(value >> 24)); + } + } +} diff --git a/src/ARMeilleure/CodeGen/Arm64/CallingConvention.cs b/src/ARMeilleure/CodeGen/Arm64/CallingConvention.cs new file mode 100644 index 00000000..fda8d786 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/CallingConvention.cs @@ -0,0 +1,96 @@ +using System; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class CallingConvention + { + private const int RegistersMask = unchecked((int)0xffffffff); + + // Some of those register have specific roles and can't be used as general purpose registers. + // X18 - Reserved for platform specific usage. + // X29 - Frame pointer. + // X30 - Return address. + // X31 - Not an actual register, in some cases maps to SP, and in others to ZR. + private const int ReservedRegsMask = (1 << CodeGenCommon.ReservedRegister) | (1 << 18) | (1 << 29) | (1 << 30) | (1 << 31); + + public static int GetIntAvailableRegisters() + { + return RegistersMask & ~ReservedRegsMask; + } + + public static int GetVecAvailableRegisters() + { + return RegistersMask; + } + + public static int GetIntCallerSavedRegisters() + { + return (GetIntCalleeSavedRegisters() ^ RegistersMask) & ~ReservedRegsMask; + } + + public static int GetFpCallerSavedRegisters() + { + return GetFpCalleeSavedRegisters() ^ RegistersMask; + } + + public static int GetVecCallerSavedRegisters() + { + return GetVecCalleeSavedRegisters() ^ RegistersMask; + } + + public static int GetIntCalleeSavedRegisters() + { + return 0x1ff80000; // X19 to X28 + } + + public static int GetFpCalleeSavedRegisters() + { + return 0xff00; // D8 to D15 + } + + public static int GetVecCalleeSavedRegisters() + { + return 0; + } + + public static int GetArgumentsOnRegsCount() + { + return 8; + } + + public static int GetIntArgumentRegister(int index) + { + if ((uint)index < (uint)GetArgumentsOnRegsCount()) + { + return index; + } + + throw new ArgumentOutOfRangeException(nameof(index)); + } + + public static int GetVecArgumentRegister(int index) + { + if ((uint)index < (uint)GetArgumentsOnRegsCount()) + { + return index; + } + + throw new ArgumentOutOfRangeException(nameof(index)); + } + + public static int GetIntReturnRegister() + { + return 0; + } + + public static int GetIntReturnRegisterHigh() + { + return 1; + } + + public static int GetVecReturnRegister() + { + return 0; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs b/src/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs new file mode 100644 index 00000000..8d1e597b --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs @@ -0,0 +1,91 @@ +using ARMeilleure.IntermediateRepresentation; +using System.Numerics; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class CodeGenCommon + { + public const int TcAddressRegister = 8; + public const int ReservedRegister = 17; + + public static bool ConstFitsOnSImm7(int value, int scale) + { + return (((value >> scale) << 25) >> (25 - scale)) == value; + } + + public static bool ConstFitsOnSImm9(int value) + { + return ((value << 23) >> 23) == value; + } + + public static bool ConstFitsOnUImm12(int value) + { + return (value & 0xfff) == value; + } + + public static bool ConstFitsOnUImm12(int value, OperandType type) + { + int scale = Assembler.GetScaleForType(type); + return (((value >> scale) & 0xfff) << scale) == value; + } + + public static bool TryEncodeBitMask(Operand operand, out int immN, out int immS, out int immR) + { + return TryEncodeBitMask(operand.Type, operand.Value, out immN, out immS, out immR); + } + + public static bool TryEncodeBitMask(OperandType type, ulong value, out int immN, out int immS, out int immR) + { + if (type == OperandType.I32) + { + value |= value << 32; + } + + return TryEncodeBitMask(value, out immN, out immS, out immR); + } + + public static bool TryEncodeBitMask(ulong value, out int immN, out int immS, out int immR) + { + // Some special values also can't be encoded: + // 0 can't be encoded because we need to subtract 1 from onesCount (which would became negative if 0). + // A value with all bits set can't be encoded because it is reserved according to the spec, because: + // Any value AND all ones will be equal itself, so it's effectively a no-op. + // Any value OR all ones will be equal all ones, so one can just use MOV. + // Any value XOR all ones will be equal its inverse, so one can just use MVN. + if (value == 0 || value == ulong.MaxValue) + { + immN = 0; + immS = 0; + immR = 0; + + return false; + } + + // Normalize value, rotating it such that the LSB is 1: Ensures we get a complete element that has not + // been cut-in-half across the word boundary. + int rotation = BitOperations.TrailingZeroCount(value & (value + 1)); + ulong rotatedValue = ulong.RotateRight(value, rotation); + + // Now that we have a complete element in the LSB with the LSB = 1, determine size and number of ones + // in element. + int elementSize = BitOperations.TrailingZeroCount(rotatedValue & (rotatedValue + 1)); + int onesInElement = BitOperations.TrailingZeroCount(~rotatedValue); + + // Check the value is repeating; also ensures element size is a power of two. + if (ulong.RotateRight(value, elementSize) != value) + { + immN = 0; + immS = 0; + immR = 0; + + return false; + } + + immN = (elementSize >> 6) & 1; + immS = (((~elementSize + 1) << 1) | (onesInElement - 1)) & 0x3f; + immR = (elementSize - rotation) & (elementSize - 1); + + return true; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs b/src/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs new file mode 100644 index 00000000..0dd5355f --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs @@ -0,0 +1,287 @@ +using ARMeilleure.CodeGen.Linking; +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.IntermediateRepresentation; +using Ryujinx.Common.Memory; +using System; +using System.Collections.Generic; +using System.IO; + +namespace ARMeilleure.CodeGen.Arm64 +{ + class CodeGenContext + { + private const int BccInstLength = 4; + private const int CbnzInstLength = 4; + private const int LdrLitInstLength = 4; + + private Stream _stream; + + public int StreamOffset => (int)_stream.Length; + + public AllocationResult AllocResult { get; } + + public Assembler Assembler { get; } + + public BasicBlock CurrBlock { get; private set; } + + public bool HasCall { get; } + + public int CallArgsRegionSize { get; } + public int FpLrSaveRegionSize { get; } + + private readonly Dictionary<BasicBlock, long> _visitedBlocks; + private readonly Dictionary<BasicBlock, List<(ArmCondition Condition, long BranchPos)>> _pendingBranches; + + private struct ConstantPoolEntry + { + public readonly int Offset; + public readonly Symbol Symbol; + public readonly List<(Operand, int)> LdrOffsets; + + public ConstantPoolEntry(int offset, Symbol symbol) + { + Offset = offset; + Symbol = symbol; + LdrOffsets = new List<(Operand, int)>(); + } + } + + private readonly Dictionary<ulong, ConstantPoolEntry> _constantPool; + + private bool _constantPoolWritten; + private long _constantPoolOffset; + + private ArmCondition _jNearCondition; + private Operand _jNearValue; + + private long _jNearPosition; + + private readonly bool _relocatable; + + public CodeGenContext(AllocationResult allocResult, int maxCallArgs, int blocksCount, bool relocatable) + { + _stream = MemoryStreamManager.Shared.GetStream(); + + AllocResult = allocResult; + + Assembler = new Assembler(_stream); + + bool hasCall = maxCallArgs >= 0; + + HasCall = hasCall; + + if (maxCallArgs < 0) + { + maxCallArgs = 0; + } + + CallArgsRegionSize = maxCallArgs * 16; + FpLrSaveRegionSize = hasCall ? 16 : 0; + + _visitedBlocks = new Dictionary<BasicBlock, long>(); + _pendingBranches = new Dictionary<BasicBlock, List<(ArmCondition, long)>>(); + _constantPool = new Dictionary<ulong, ConstantPoolEntry>(); + + _relocatable = relocatable; + } + + public void EnterBlock(BasicBlock block) + { + CurrBlock = block; + + long target = _stream.Position; + + if (_pendingBranches.TryGetValue(block, out var list)) + { + foreach (var tuple in list) + { + _stream.Seek(tuple.BranchPos, SeekOrigin.Begin); + WriteBranch(tuple.Condition, target); + } + + _stream.Seek(target, SeekOrigin.Begin); + _pendingBranches.Remove(block); + } + + _visitedBlocks.Add(block, target); + } + + public void JumpTo(BasicBlock target) + { + JumpTo(ArmCondition.Al, target); + } + + public void JumpTo(ArmCondition condition, BasicBlock target) + { + if (_visitedBlocks.TryGetValue(target, out long offset)) + { + WriteBranch(condition, offset); + } + else + { + if (!_pendingBranches.TryGetValue(target, out var list)) + { + list = new List<(ArmCondition, long)>(); + _pendingBranches.Add(target, list); + } + + list.Add((condition, _stream.Position)); + + _stream.Seek(BccInstLength, SeekOrigin.Current); + } + } + + private void WriteBranch(ArmCondition condition, long to) + { + int imm = checked((int)(to - _stream.Position)); + + if (condition != ArmCondition.Al) + { + Assembler.B(condition, imm); + } + else + { + Assembler.B(imm); + } + } + + public void JumpToNear(ArmCondition condition) + { + _jNearCondition = condition; + _jNearPosition = _stream.Position; + + _stream.Seek(BccInstLength, SeekOrigin.Current); + } + + public void JumpToNearIfNotZero(Operand value) + { + _jNearValue = value; + _jNearPosition = _stream.Position; + + _stream.Seek(CbnzInstLength, SeekOrigin.Current); + } + + public void JumpHere() + { + long currentPosition = _stream.Position; + long offset = currentPosition - _jNearPosition; + + _stream.Seek(_jNearPosition, SeekOrigin.Begin); + + if (_jNearValue != default) + { + Assembler.Cbnz(_jNearValue, checked((int)offset)); + _jNearValue = default; + } + else + { + Assembler.B(_jNearCondition, checked((int)offset)); + } + + _stream.Seek(currentPosition, SeekOrigin.Begin); + } + + public void ReserveRelocatableConstant(Operand rt, Symbol symbol, ulong value) + { + if (!_constantPool.TryGetValue(value, out ConstantPoolEntry cpe)) + { + cpe = new ConstantPoolEntry(_constantPool.Count * sizeof(ulong), symbol); + _constantPool.Add(value, cpe); + } + + cpe.LdrOffsets.Add((rt, (int)_stream.Position)); + _stream.Seek(LdrLitInstLength, SeekOrigin.Current); + } + + private long WriteConstantPool() + { + if (_constantPoolWritten) + { + return _constantPoolOffset; + } + + long constantPoolBaseOffset = _stream.Position; + + foreach (ulong value in _constantPool.Keys) + { + WriteUInt64(value); + } + + foreach (ConstantPoolEntry cpe in _constantPool.Values) + { + foreach ((Operand rt, int ldrOffset) in cpe.LdrOffsets) + { + _stream.Seek(ldrOffset, SeekOrigin.Begin); + + int absoluteOffset = checked((int)(constantPoolBaseOffset + cpe.Offset)); + int pcRelativeOffset = absoluteOffset - ldrOffset; + + Assembler.LdrLit(rt, pcRelativeOffset); + } + } + + _stream.Seek(constantPoolBaseOffset + _constantPool.Count * sizeof(ulong), SeekOrigin.Begin); + + _constantPoolOffset = constantPoolBaseOffset; + _constantPoolWritten = true; + + return constantPoolBaseOffset; + } + + public (byte[], RelocInfo) GetCode() + { + long constantPoolBaseOffset = WriteConstantPool(); + + byte[] code = new byte[_stream.Length]; + + long originalPosition = _stream.Position; + + _stream.Seek(0, SeekOrigin.Begin); + _stream.Read(code, 0, code.Length); + _stream.Seek(originalPosition, SeekOrigin.Begin); + + RelocInfo relocInfo; + + if (_relocatable) + { + RelocEntry[] relocs = new RelocEntry[_constantPool.Count]; + + int index = 0; + + foreach (ConstantPoolEntry cpe in _constantPool.Values) + { + if (cpe.Symbol.Type != SymbolType.None) + { + int absoluteOffset = checked((int)(constantPoolBaseOffset + cpe.Offset)); + relocs[index++] = new RelocEntry(absoluteOffset, cpe.Symbol); + } + } + + if (index != relocs.Length) + { + Array.Resize(ref relocs, index); + } + + relocInfo = new RelocInfo(relocs); + } + else + { + relocInfo = new RelocInfo(Array.Empty<RelocEntry>()); + } + + return (code, relocInfo); + } + + private void WriteUInt64(ulong value) + { + _stream.WriteByte((byte)(value >> 0)); + _stream.WriteByte((byte)(value >> 8)); + _stream.WriteByte((byte)(value >> 16)); + _stream.WriteByte((byte)(value >> 24)); + _stream.WriteByte((byte)(value >> 32)); + _stream.WriteByte((byte)(value >> 40)); + _stream.WriteByte((byte)(value >> 48)); + _stream.WriteByte((byte)(value >> 56)); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs b/src/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs new file mode 100644 index 00000000..fc4fa976 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs @@ -0,0 +1,1580 @@ +using ARMeilleure.CodeGen.Linking; +using ARMeilleure.CodeGen.Optimizations; +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.CodeGen.Unwinding; +using ARMeilleure.Common; +using ARMeilleure.Diagnostics; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Numerics; + +using static ARMeilleure.IntermediateRepresentation.Operand; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class CodeGenerator + { + private const int DWordScale = 3; + + private const int RegistersCount = 32; + + private const int FpRegister = 29; + private const int LrRegister = 30; + private const int SpRegister = 31; + private const int ZrRegister = 31; + + private enum AccessSize + { + Byte, + Hword, + Auto + } + + private static Action<CodeGenContext, Operation>[] _instTable; + + static CodeGenerator() + { + _instTable = new Action<CodeGenContext, Operation>[EnumUtils.GetCount(typeof(Instruction))]; + + Add(Instruction.Add, GenerateAdd); + Add(Instruction.BitwiseAnd, GenerateBitwiseAnd); + Add(Instruction.BitwiseExclusiveOr, GenerateBitwiseExclusiveOr); + Add(Instruction.BitwiseNot, GenerateBitwiseNot); + Add(Instruction.BitwiseOr, GenerateBitwiseOr); + Add(Instruction.BranchIf, GenerateBranchIf); + Add(Instruction.ByteSwap, GenerateByteSwap); + Add(Instruction.Call, GenerateCall); + //Add(Instruction.Clobber, GenerateClobber); + Add(Instruction.Compare, GenerateCompare); + Add(Instruction.CompareAndSwap, GenerateCompareAndSwap); + Add(Instruction.CompareAndSwap16, GenerateCompareAndSwap16); + Add(Instruction.CompareAndSwap8, GenerateCompareAndSwap8); + Add(Instruction.ConditionalSelect, GenerateConditionalSelect); + Add(Instruction.ConvertI64ToI32, GenerateConvertI64ToI32); + Add(Instruction.ConvertToFP, GenerateConvertToFP); + Add(Instruction.ConvertToFPUI, GenerateConvertToFPUI); + Add(Instruction.Copy, GenerateCopy); + Add(Instruction.CountLeadingZeros, GenerateCountLeadingZeros); + Add(Instruction.Divide, GenerateDivide); + Add(Instruction.DivideUI, GenerateDivideUI); + Add(Instruction.Fill, GenerateFill); + Add(Instruction.Load, GenerateLoad); + Add(Instruction.Load16, GenerateLoad16); + Add(Instruction.Load8, GenerateLoad8); + Add(Instruction.MemoryBarrier, GenerateMemoryBarrier); + Add(Instruction.Multiply, GenerateMultiply); + Add(Instruction.Multiply64HighSI, GenerateMultiply64HighSI); + Add(Instruction.Multiply64HighUI, GenerateMultiply64HighUI); + Add(Instruction.Negate, GenerateNegate); + Add(Instruction.Return, GenerateReturn); + Add(Instruction.RotateRight, GenerateRotateRight); + Add(Instruction.ShiftLeft, GenerateShiftLeft); + Add(Instruction.ShiftRightSI, GenerateShiftRightSI); + Add(Instruction.ShiftRightUI, GenerateShiftRightUI); + Add(Instruction.SignExtend16, GenerateSignExtend16); + Add(Instruction.SignExtend32, GenerateSignExtend32); + Add(Instruction.SignExtend8, GenerateSignExtend8); + Add(Instruction.Spill, GenerateSpill); + Add(Instruction.SpillArg, GenerateSpillArg); + Add(Instruction.StackAlloc, GenerateStackAlloc); + Add(Instruction.Store, GenerateStore); + Add(Instruction.Store16, GenerateStore16); + Add(Instruction.Store8, GenerateStore8); + Add(Instruction.Subtract, GenerateSubtract); + Add(Instruction.Tailcall, GenerateTailcall); + Add(Instruction.VectorCreateScalar, GenerateVectorCreateScalar); + Add(Instruction.VectorExtract, GenerateVectorExtract); + Add(Instruction.VectorExtract16, GenerateVectorExtract16); + Add(Instruction.VectorExtract8, GenerateVectorExtract8); + Add(Instruction.VectorInsert, GenerateVectorInsert); + Add(Instruction.VectorInsert16, GenerateVectorInsert16); + Add(Instruction.VectorInsert8, GenerateVectorInsert8); + Add(Instruction.VectorOne, GenerateVectorOne); + Add(Instruction.VectorZero, GenerateVectorZero); + Add(Instruction.VectorZeroUpper64, GenerateVectorZeroUpper64); + Add(Instruction.VectorZeroUpper96, GenerateVectorZeroUpper96); + Add(Instruction.ZeroExtend16, GenerateZeroExtend16); + Add(Instruction.ZeroExtend32, GenerateZeroExtend32); + Add(Instruction.ZeroExtend8, GenerateZeroExtend8); + + static void Add(Instruction inst, Action<CodeGenContext, Operation> func) + { + _instTable[(int)inst] = func; + } + } + + public static CompiledFunction Generate(CompilerContext cctx) + { + ControlFlowGraph cfg = cctx.Cfg; + + Logger.StartPass(PassName.Optimization); + + if (cctx.Options.HasFlag(CompilerOptions.Optimize)) + { + if (cctx.Options.HasFlag(CompilerOptions.SsaForm)) + { + Optimizer.RunPass(cfg); + } + + BlockPlacement.RunPass(cfg); + } + + Arm64Optimizer.RunPass(cfg); + + Logger.EndPass(PassName.Optimization, cfg); + + Logger.StartPass(PassName.PreAllocation); + + StackAllocator stackAlloc = new(); + + PreAllocator.RunPass(cctx, stackAlloc, out int maxCallArgs); + + Logger.EndPass(PassName.PreAllocation, cfg); + + Logger.StartPass(PassName.RegisterAllocation); + + if (cctx.Options.HasFlag(CompilerOptions.SsaForm)) + { + Ssa.Deconstruct(cfg); + } + + IRegisterAllocator regAlloc; + + if (cctx.Options.HasFlag(CompilerOptions.Lsra)) + { + regAlloc = new LinearScanAllocator(); + } + else + { + regAlloc = new HybridAllocator(); + } + + RegisterMasks regMasks = new( + CallingConvention.GetIntAvailableRegisters(), + CallingConvention.GetVecAvailableRegisters(), + CallingConvention.GetIntCallerSavedRegisters(), + CallingConvention.GetVecCallerSavedRegisters(), + CallingConvention.GetIntCalleeSavedRegisters(), + CallingConvention.GetVecCalleeSavedRegisters(), + RegistersCount); + + AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks); + + Logger.EndPass(PassName.RegisterAllocation, cfg); + + Logger.StartPass(PassName.CodeGeneration); + + //Console.Error.WriteLine(IRDumper.GetDump(cfg)); + + bool relocatable = (cctx.Options & CompilerOptions.Relocatable) != 0; + + CodeGenContext context = new(allocResult, maxCallArgs, cfg.Blocks.Count, relocatable); + + UnwindInfo unwindInfo = WritePrologue(context); + + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + context.EnterBlock(block); + + for (Operation node = block.Operations.First; node != default;) + { + node = GenerateOperation(context, node); + } + + if (block.SuccessorsCount == 0) + { + // The only blocks which can have 0 successors are exit blocks. + Operation last = block.Operations.Last; + + Debug.Assert(last.Instruction == Instruction.Tailcall || + last.Instruction == Instruction.Return); + } + else + { + BasicBlock succ = block.GetSuccessor(0); + + if (succ != block.ListNext) + { + context.JumpTo(succ); + } + } + } + + (byte[] code, RelocInfo relocInfo) = context.GetCode(); + + Logger.EndPass(PassName.CodeGeneration); + + return new CompiledFunction(code, unwindInfo, relocInfo); + } + + private static Operation GenerateOperation(CodeGenContext context, Operation operation) + { + if (operation.Instruction == Instruction.Extended) + { + CodeGeneratorIntrinsic.GenerateOperation(context, operation); + } + else + { + if (IsLoadOrStore(operation) && + operation.ListNext != default && + operation.ListNext.Instruction == operation.Instruction && + TryPairMemoryOp(context, operation, operation.ListNext)) + { + // Skip next operation if we managed to pair them. + return operation.ListNext.ListNext; + } + + Action<CodeGenContext, Operation> func = _instTable[(int)operation.Instruction]; + + if (func != null) + { + func(context, operation); + } + else + { + throw new ArgumentException($"Invalid instruction \"{operation.Instruction}\"."); + } + } + + return operation.ListNext; + } + + private static void GenerateAdd(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + // ValidateBinOp(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Add(dest, src1, src2); + } + else + { + context.Assembler.FaddScalar(dest, src1, src2); + } + } + + private static void GenerateBitwiseAnd(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.And(dest, src1, src2); + } + + private static void GenerateBitwiseExclusiveOr(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Eor(dest, src1, src2); + } + else + { + context.Assembler.EorVector(dest, src1, src2); + } + } + + private static void GenerateBitwiseNot(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + ValidateUnOp(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Mvn(dest, source); + } + + private static void GenerateBitwiseOr(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Orr(dest, src1, src2); + } + + private static void GenerateBranchIf(CodeGenContext context, Operation operation) + { + Operand comp = operation.GetSource(2); + + Debug.Assert(comp.Kind == OperandKind.Constant); + + var cond = ((Comparison)comp.AsInt32()).ToArmCondition(); + + GenerateCompareCommon(context, operation); + + context.JumpTo(cond, context.CurrBlock.GetSuccessor(1)); + } + + private static void GenerateByteSwap(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + ValidateUnOp(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Rev(dest, source); + } + + private static void GenerateCall(CodeGenContext context, Operation operation) + { + context.Assembler.Blr(operation.GetSource(0)); + } + + private static void GenerateCompare(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand comp = operation.GetSource(2); + + Debug.Assert(dest.Type == OperandType.I32); + Debug.Assert(comp.Kind == OperandKind.Constant); + + var cond = ((Comparison)comp.AsInt32()).ToArmCondition(); + + GenerateCompareCommon(context, operation); + + context.Assembler.Cset(dest, cond); + } + + private static void GenerateCompareAndSwap(CodeGenContext context, Operation operation) + { + if (operation.SourcesCount == 5) // CompareAndSwap128 has 5 sources, compared to CompareAndSwap64/32's 3. + { + Operand actualLow = operation.GetDestination(0); + Operand actualHigh = operation.GetDestination(1); + Operand temp0 = operation.GetDestination(2); + Operand temp1 = operation.GetDestination(3); + Operand address = operation.GetSource(0); + Operand expectedLow = operation.GetSource(1); + Operand expectedHigh = operation.GetSource(2); + Operand desiredLow = operation.GetSource(3); + Operand desiredHigh = operation.GetSource(4); + + GenerateAtomicDcas( + context, + address, + expectedLow, + expectedHigh, + desiredLow, + desiredHigh, + actualLow, + actualHigh, + temp0, + temp1); + } + else + { + Operand actual = operation.GetDestination(0); + Operand result = operation.GetDestination(1); + Operand address = operation.GetSource(0); + Operand expected = operation.GetSource(1); + Operand desired = operation.GetSource(2); + + GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Auto); + } + } + + private static void GenerateCompareAndSwap16(CodeGenContext context, Operation operation) + { + Operand actual = operation.GetDestination(0); + Operand result = operation.GetDestination(1); + Operand address = operation.GetSource(0); + Operand expected = operation.GetSource(1); + Operand desired = operation.GetSource(2); + + GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Hword); + } + + private static void GenerateCompareAndSwap8(CodeGenContext context, Operation operation) + { + Operand actual = operation.GetDestination(0); + Operand result = operation.GetDestination(1); + Operand address = operation.GetSource(0); + Operand expected = operation.GetSource(1); + Operand desired = operation.GetSource(2); + + GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Byte); + } + + private static void GenerateCompareCommon(CodeGenContext context, Operation operation) + { + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(src1, src2); + + Debug.Assert(src1.Type.IsInteger()); + + context.Assembler.Cmp(src1, src2); + } + + private static void GenerateConditionalSelect(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + EnsureSameType(dest, src2, src3); + + Debug.Assert(dest.Type.IsInteger()); + Debug.Assert(src1.Type == OperandType.I32); + + context.Assembler.Cmp (src1, Const(src1.Type, 0)); + context.Assembler.Csel(dest, src2, src3, ArmCondition.Ne); + } + + private static void GenerateConvertI64ToI32(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.I32 && source.Type == OperandType.I64); + + context.Assembler.Mov(dest, Register(source, OperandType.I32)); + } + + private static void GenerateConvertToFP(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64); + Debug.Assert(dest.Type != source.Type); + Debug.Assert(source.Type != OperandType.V128); + + if (source.Type.IsInteger()) + { + context.Assembler.ScvtfScalar(dest, source); + } + else + { + context.Assembler.FcvtScalar(dest, source); + } + } + + private static void GenerateConvertToFPUI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64); + Debug.Assert(dest.Type != source.Type); + Debug.Assert(source.Type.IsInteger()); + + context.Assembler.UcvtfScalar(dest, source); + } + + private static void GenerateCopy(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + EnsureSameType(dest, source); + + Debug.Assert(dest.Type.IsInteger() || source.Kind != OperandKind.Constant); + + // Moves to the same register are useless. + if (dest.Kind == source.Kind && dest.Value == source.Value) + { + return; + } + + if (dest.Kind == OperandKind.Register && source.Kind == OperandKind.Constant) + { + if (source.Relocatable) + { + context.ReserveRelocatableConstant(dest, source.Symbol, source.Value); + } + else + { + GenerateConstantCopy(context, dest, source.Value); + } + } + else + { + context.Assembler.Mov(dest, source); + } + } + + private static void GenerateCountLeadingZeros(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + EnsureSameType(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Clz(dest, source); + } + + private static void GenerateDivide(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand dividend = operation.GetSource(0); + Operand divisor = operation.GetSource(1); + + ValidateBinOp(dest, dividend, divisor); + + if (dest.Type.IsInteger()) + { + context.Assembler.Sdiv(dest, dividend, divisor); + } + else + { + context.Assembler.FdivScalar(dest, dividend, divisor); + } + } + + private static void GenerateDivideUI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand dividend = operation.GetSource(0); + Operand divisor = operation.GetSource(1); + + ValidateBinOp(dest, dividend, divisor); + + context.Assembler.Udiv(dest, dividend, divisor); + } + + private static void GenerateLoad(CodeGenContext context, Operation operation) + { + Operand value = operation.Destination; + Operand address = operation.GetSource(0); + + context.Assembler.Ldr(value, address); + } + + private static void GenerateLoad16(CodeGenContext context, Operation operation) + { + Operand value = operation.Destination; + Operand address = operation.GetSource(0); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.LdrhRiUn(value, address, 0); + } + + private static void GenerateLoad8(CodeGenContext context, Operation operation) + { + Operand value = operation.Destination; + Operand address = operation.GetSource(0); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.LdrbRiUn(value, address, 0); + } + + private static void GenerateMemoryBarrier(CodeGenContext context, Operation operation) + { + context.Assembler.Dmb(0xf); + } + + private static void GenerateMultiply(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Mul(dest, src1, src2); + } + else + { + context.Assembler.FmulScalar(dest, src1, src2); + } + } + + private static void GenerateMultiply64HighSI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(dest, src1, src2); + + Debug.Assert(dest.Type == OperandType.I64); + + context.Assembler.Smulh(dest, src1, src2); + } + + private static void GenerateMultiply64HighUI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(dest, src1, src2); + + Debug.Assert(dest.Type == OperandType.I64); + + context.Assembler.Umulh(dest, src1, src2); + } + + private static void GenerateNegate(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + ValidateUnOp(dest, source); + + if (dest.Type.IsInteger()) + { + context.Assembler.Neg(dest, source); + } + else + { + context.Assembler.FnegScalar(dest, source); + } + } + + private static void GenerateLoad(CodeGenContext context, Operand value, Operand address, int offset) + { + if (CodeGenCommon.ConstFitsOnUImm12(offset, value.Type)) + { + context.Assembler.LdrRiUn(value, address, offset); + } + else if (CodeGenCommon.ConstFitsOnSImm9(offset)) + { + context.Assembler.Ldur(value, address, offset); + } + else + { + Operand tempAddress = Register(CodeGenCommon.ReservedRegister); + GenerateConstantCopy(context, tempAddress, (ulong)offset); + context.Assembler.Add(tempAddress, address, tempAddress, ArmExtensionType.Uxtx); // Address might be SP and must be the first input. + context.Assembler.LdrRiUn(value, tempAddress, 0); + } + } + + private static void GenerateReturn(CodeGenContext context, Operation operation) + { + WriteEpilogue(context); + + context.Assembler.Ret(Register(LrRegister)); + } + + private static void GenerateRotateRight(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Ror(dest, src1, src2); + } + + private static void GenerateShiftLeft(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Lsl(dest, src1, src2); + } + + private static void GenerateShiftRightSI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Asr(dest, src1, src2); + } + + private static void GenerateShiftRightUI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Lsr(dest, src1, src2); + } + + private static void GenerateSignExtend16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Sxth(dest, source); + } + + private static void GenerateSignExtend32(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Sxtw(dest, source); + } + + private static void GenerateSignExtend8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Sxtb(dest, source); + } + + private static void GenerateFill(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand offset = operation.GetSource(0); + + Debug.Assert(offset.Kind == OperandKind.Constant); + + int offs = offset.AsInt32() + context.CallArgsRegionSize + context.FpLrSaveRegionSize; + + GenerateLoad(context, dest, Register(SpRegister), offs); + } + + private static void GenerateStore(CodeGenContext context, Operand value, Operand address, int offset) + { + if (CodeGenCommon.ConstFitsOnUImm12(offset, value.Type)) + { + context.Assembler.StrRiUn(value, address, offset); + } + else if (CodeGenCommon.ConstFitsOnSImm9(offset)) + { + context.Assembler.Stur(value, address, offset); + } + else + { + Operand tempAddress = Register(CodeGenCommon.ReservedRegister); + GenerateConstantCopy(context, tempAddress, (ulong)offset); + context.Assembler.Add(tempAddress, address, tempAddress, ArmExtensionType.Uxtx); // Address might be SP and must be the first input. + context.Assembler.StrRiUn(value, tempAddress, 0); + } + } + + private static void GenerateSpill(CodeGenContext context, Operation operation) + { + GenerateSpill(context, operation, context.CallArgsRegionSize + context.FpLrSaveRegionSize); + } + + private static void GenerateSpillArg(CodeGenContext context, Operation operation) + { + GenerateSpill(context, operation, 0); + } + + private static void GenerateStackAlloc(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand offset = operation.GetSource(0); + + Debug.Assert(offset.Kind == OperandKind.Constant); + + int offs = offset.AsInt32() + context.CallArgsRegionSize + context.FpLrSaveRegionSize; + + context.Assembler.Add(dest, Register(SpRegister), Const(dest.Type, offs)); + } + + private static void GenerateStore(CodeGenContext context, Operation operation) + { + Operand value = operation.GetSource(1); + Operand address = operation.GetSource(0); + + context.Assembler.Str(value, address); + } + + private static void GenerateStore16(CodeGenContext context, Operation operation) + { + Operand value = operation.GetSource(1); + Operand address = operation.GetSource(0); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.StrhRiUn(value, address, 0); + } + + private static void GenerateStore8(CodeGenContext context, Operation operation) + { + Operand value = operation.GetSource(1); + Operand address = operation.GetSource(0); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.StrbRiUn(value, address, 0); + } + + private static void GenerateSpill(CodeGenContext context, Operation operation, int baseOffset) + { + Operand offset = operation.GetSource(0); + Operand source = operation.GetSource(1); + + Debug.Assert(offset.Kind == OperandKind.Constant); + + int offs = offset.AsInt32() + baseOffset; + + GenerateStore(context, source, Register(SpRegister), offs); + } + + private static void GenerateSubtract(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + // ValidateBinOp(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Sub(dest, src1, src2); + } + else + { + context.Assembler.FsubScalar(dest, src1, src2); + } + } + + private static void GenerateTailcall(CodeGenContext context, Operation operation) + { + WriteEpilogue(context); + + context.Assembler.Br(operation.GetSource(0)); + } + + private static void GenerateVectorCreateScalar(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + if (dest != default) + { + Debug.Assert(!dest.Type.IsInteger() && source.Type.IsInteger()); + + OperandType destType = source.Type == OperandType.I64 ? OperandType.FP64 : OperandType.FP32; + + context.Assembler.Fmov(Register(dest, destType), source, topHalf: false); + } + } + + private static void GenerateVectorExtract(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; // Value + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Index + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src2.Kind == OperandKind.Constant); + + byte index = src2.AsByte(); + + Debug.Assert(index < OperandType.V128.GetSizeInBytes() / dest.Type.GetSizeInBytes()); + + if (dest.Type.IsInteger()) + { + context.Assembler.Umov(dest, src1, index, dest.Type == OperandType.I64 ? 3 : 2); + } + else + { + context.Assembler.DupScalar(dest, src1, index, dest.Type == OperandType.FP64 ? 3 : 2); + } + } + + private static void GenerateVectorExtract16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; // Value + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Index + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src2.Kind == OperandKind.Constant); + + byte index = src2.AsByte(); + + Debug.Assert(index < 8); + + context.Assembler.Umov(dest, src1, index, 1); + } + + private static void GenerateVectorExtract8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; // Value + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Index + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src2.Kind == OperandKind.Constant); + + byte index = src2.AsByte(); + + Debug.Assert(index < 16); + + context.Assembler.Umov(dest, src1, index, 0); + } + + private static void GenerateVectorInsert(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Value + Operand src3 = operation.GetSource(2); // Index + + EnsureSameReg(dest, src1); + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + if (src2.Type.IsInteger()) + { + context.Assembler.Ins(dest, src2, index, src2.Type == OperandType.I64 ? 3 : 2); + } + else + { + context.Assembler.Ins(dest, src2, 0, index, src2.Type == OperandType.FP64 ? 3 : 2); + } + } + + private static void GenerateVectorInsert16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Value + Operand src3 = operation.GetSource(2); // Index + + EnsureSameReg(dest, src1); + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + context.Assembler.Ins(dest, src2, index, 1); + } + + private static void GenerateVectorInsert8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Value + Operand src3 = operation.GetSource(2); // Index + + EnsureSameReg(dest, src1); + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + context.Assembler.Ins(dest, src2, index, 0); + } + + private static void GenerateVectorOne(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + + Debug.Assert(!dest.Type.IsInteger()); + + context.Assembler.CmeqVector(dest, dest, dest, 2); + } + + private static void GenerateVectorZero(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + + Debug.Assert(!dest.Type.IsInteger()); + + context.Assembler.EorVector(dest, dest, dest); + } + + private static void GenerateVectorZeroUpper64(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128); + + context.Assembler.Fmov(Register(dest, OperandType.FP64), Register(source, OperandType.FP64)); + } + + private static void GenerateVectorZeroUpper96(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128); + + context.Assembler.Fmov(Register(dest, OperandType.FP32), Register(source, OperandType.FP32)); + } + + private static void GenerateZeroExtend16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Uxth(dest, source); + } + + private static void GenerateZeroExtend32(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + // We can eliminate the move if source is already 32-bit and the registers are the same. + if (dest.Value == source.Value && source.Type == OperandType.I32) + { + return; + } + + context.Assembler.Mov(Register(dest.GetRegister().Index, OperandType.I32), source); + } + + private static void GenerateZeroExtend8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Uxtb(dest, source); + } + + private static UnwindInfo WritePrologue(CodeGenContext context) + { + List<UnwindPushEntry> pushEntries = new List<UnwindPushEntry>(); + + Operand rsp = Register(SpRegister); + + int intMask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters; + int vecMask = CallingConvention.GetFpCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters; + + int intCalleeSavedRegsCount = BitOperations.PopCount((uint)intMask); + int vecCalleeSavedRegsCount = BitOperations.PopCount((uint)vecMask); + + int calleeSaveRegionSize = Align16(intCalleeSavedRegsCount * 8 + vecCalleeSavedRegsCount * 8); + + int offset = 0; + + WritePrologueCalleeSavesPreIndexed(context, pushEntries, ref intMask, ref offset, calleeSaveRegionSize, OperandType.I64); + WritePrologueCalleeSavesPreIndexed(context, pushEntries, ref vecMask, ref offset, calleeSaveRegionSize, OperandType.FP64); + + int localSize = Align16(context.AllocResult.SpillRegionSize + context.FpLrSaveRegionSize); + int outArgsSize = context.CallArgsRegionSize; + + if (CodeGenCommon.ConstFitsOnSImm7(localSize, DWordScale)) + { + if (context.HasCall) + { + context.Assembler.StpRiPre(Register(FpRegister), Register(LrRegister), rsp, -localSize); + context.Assembler.MovSp(Register(FpRegister), rsp); + } + + if (outArgsSize != 0) + { + context.Assembler.Sub(rsp, rsp, Const(OperandType.I64, outArgsSize)); + } + } + else + { + int frameSize = localSize + outArgsSize; + if (frameSize != 0) + { + if (CodeGenCommon.ConstFitsOnUImm12(frameSize)) + { + context.Assembler.Sub(rsp, rsp, Const(OperandType.I64, frameSize)); + } + else + { + Operand tempSize = Register(CodeGenCommon.ReservedRegister); + GenerateConstantCopy(context, tempSize, (ulong)frameSize); + context.Assembler.Sub(rsp, rsp, tempSize, ArmExtensionType.Uxtx); + } + } + + context.Assembler.StpRiUn(Register(FpRegister), Register(LrRegister), rsp, outArgsSize); + + if (outArgsSize != 0) + { + context.Assembler.Add(Register(FpRegister), Register(SpRegister), Const(OperandType.I64, outArgsSize)); + } + else + { + context.Assembler.MovSp(Register(FpRegister), Register(SpRegister)); + } + } + + return new UnwindInfo(pushEntries.ToArray(), context.StreamOffset); + } + + private static void WritePrologueCalleeSavesPreIndexed( + CodeGenContext context, + List<UnwindPushEntry> pushEntries, + ref int mask, + ref int offset, + int calleeSaveRegionSize, + OperandType type) + { + if ((BitOperations.PopCount((uint)mask) & 1) != 0) + { + int reg = BitOperations.TrailingZeroCount(mask); + + pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg)); + + mask &= ~(1 << reg); + + if (offset != 0) + { + context.Assembler.StrRiUn(Register(reg, type), Register(SpRegister), offset); + } + else + { + context.Assembler.StrRiPre(Register(reg, type), Register(SpRegister), -calleeSaveRegionSize); + } + + offset += type.GetSizeInBytes(); + } + + while (mask != 0) + { + int reg = BitOperations.TrailingZeroCount(mask); + + pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg)); + + mask &= ~(1 << reg); + + int reg2 = BitOperations.TrailingZeroCount(mask); + + pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg2)); + + mask &= ~(1 << reg2); + + if (offset != 0) + { + context.Assembler.StpRiUn(Register(reg, type), Register(reg2, type), Register(SpRegister), offset); + } + else + { + context.Assembler.StpRiPre(Register(reg, type), Register(reg2, type), Register(SpRegister), -calleeSaveRegionSize); + } + + offset += type.GetSizeInBytes() * 2; + } + } + + private static void WriteEpilogue(CodeGenContext context) + { + Operand rsp = Register(SpRegister); + + int localSize = Align16(context.AllocResult.SpillRegionSize + context.FpLrSaveRegionSize); + int outArgsSize = context.CallArgsRegionSize; + + if (CodeGenCommon.ConstFitsOnSImm7(localSize, DWordScale)) + { + if (outArgsSize != 0) + { + context.Assembler.Add(rsp, rsp, Const(OperandType.I64, outArgsSize)); + } + + if (context.HasCall) + { + context.Assembler.LdpRiPost(Register(FpRegister), Register(LrRegister), rsp, localSize); + } + } + else + { + if (context.HasCall) + { + context.Assembler.LdpRiUn(Register(FpRegister), Register(LrRegister), rsp, outArgsSize); + } + + int frameSize = localSize + outArgsSize; + if (frameSize != 0) + { + if (CodeGenCommon.ConstFitsOnUImm12(frameSize)) + { + context.Assembler.Add(rsp, rsp, Const(OperandType.I64, frameSize)); + } + else + { + Operand tempSize = Register(CodeGenCommon.ReservedRegister); + GenerateConstantCopy(context, tempSize, (ulong)frameSize); + context.Assembler.Add(rsp, rsp, tempSize, ArmExtensionType.Uxtx); + } + } + } + + int intMask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters; + int vecMask = CallingConvention.GetFpCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters; + + int intCalleeSavedRegsCount = BitOperations.PopCount((uint)intMask); + int vecCalleeSavedRegsCount = BitOperations.PopCount((uint)vecMask); + + int offset = intCalleeSavedRegsCount * 8 + vecCalleeSavedRegsCount * 8; + int calleeSaveRegionSize = Align16(offset); + + WriteEpilogueCalleeSavesPostIndexed(context, ref vecMask, ref offset, calleeSaveRegionSize, OperandType.FP64); + WriteEpilogueCalleeSavesPostIndexed(context, ref intMask, ref offset, calleeSaveRegionSize, OperandType.I64); + } + + private static void WriteEpilogueCalleeSavesPostIndexed( + CodeGenContext context, + ref int mask, + ref int offset, + int calleeSaveRegionSize, + OperandType type) + { + while (mask != 0) + { + int reg = BitUtils.HighestBitSet(mask); + + mask &= ~(1 << reg); + + if (mask != 0) + { + int reg2 = BitUtils.HighestBitSet(mask); + + mask &= ~(1 << reg2); + + offset -= type.GetSizeInBytes() * 2; + + if (offset != 0) + { + context.Assembler.LdpRiUn(Register(reg2, type), Register(reg, type), Register(SpRegister), offset); + } + else + { + context.Assembler.LdpRiPost(Register(reg2, type), Register(reg, type), Register(SpRegister), calleeSaveRegionSize); + } + } + else + { + offset -= type.GetSizeInBytes(); + + if (offset != 0) + { + context.Assembler.LdrRiUn(Register(reg, type), Register(SpRegister), offset); + } + else + { + context.Assembler.LdrRiPost(Register(reg, type), Register(SpRegister), calleeSaveRegionSize); + } + } + } + } + + private static void GenerateConstantCopy(CodeGenContext context, Operand dest, ulong value) + { + if (value == 0) + { + context.Assembler.Mov(dest, Register(ZrRegister, dest.Type)); + } + else if (CodeGenCommon.TryEncodeBitMask(dest.Type, value, out _, out _, out _)) + { + context.Assembler.Orr(dest, Register(ZrRegister, dest.Type), Const(dest.Type, (long)value)); + } + else + { + int hw = 0; + bool first = true; + + while (value != 0) + { + int valueLow = (ushort)value; + if (valueLow != 0) + { + if (first) + { + context.Assembler.Movz(dest, valueLow, hw); + first = false; + } + else + { + context.Assembler.Movk(dest, valueLow, hw); + } + } + + hw++; + value >>= 16; + } + } + } + + private static void GenerateAtomicCas( + CodeGenContext context, + Operand address, + Operand expected, + Operand desired, + Operand actual, + Operand result, + AccessSize accessSize) + { + int startOffset = context.StreamOffset; + + switch (accessSize) + { + case AccessSize.Byte: + context.Assembler.Ldaxrb(actual, address); + break; + case AccessSize.Hword: + context.Assembler.Ldaxrh(actual, address); + break; + default: + context.Assembler.Ldaxr(actual, address); + break; + } + + context.Assembler.Cmp(actual, expected); + + context.JumpToNear(ArmCondition.Ne); + + switch (accessSize) + { + case AccessSize.Byte: + context.Assembler.Stlxrb(desired, address, result); + break; + case AccessSize.Hword: + context.Assembler.Stlxrh(desired, address, result); + break; + default: + context.Assembler.Stlxr(desired, address, result); + break; + } + + context.Assembler.Cbnz(result, startOffset - context.StreamOffset); // Retry if store failed. + + context.JumpHere(); + + context.Assembler.Clrex(); + } + + private static void GenerateAtomicDcas( + CodeGenContext context, + Operand address, + Operand expectedLow, + Operand expectedHigh, + Operand desiredLow, + Operand desiredHigh, + Operand actualLow, + Operand actualHigh, + Operand temp0, + Operand temp1) + { + int startOffset = context.StreamOffset; + + context.Assembler.Ldaxp(actualLow, actualHigh, address); + context.Assembler.Eor(temp0, actualHigh, expectedHigh); + context.Assembler.Eor(temp1, actualLow, expectedLow); + context.Assembler.Orr(temp0, temp1, temp0); + + context.JumpToNearIfNotZero(temp0); + + Operand result = Register(temp0, OperandType.I32); + + context.Assembler.Stlxp(desiredLow, desiredHigh, address, result); + context.Assembler.Cbnz(result, startOffset - context.StreamOffset); // Retry if store failed. + + context.JumpHere(); + + context.Assembler.Clrex(); + } + + private static bool TryPairMemoryOp(CodeGenContext context, Operation currentOp, Operation nextOp) + { + if (!TryGetMemOpBaseAndOffset(currentOp, out Operand op1Base, out int op1Offset)) + { + return false; + } + + if (!TryGetMemOpBaseAndOffset(nextOp, out Operand op2Base, out int op2Offset)) + { + return false; + } + + if (op1Base != op2Base) + { + return false; + } + + OperandType valueType = GetMemOpValueType(currentOp); + + if (valueType != GetMemOpValueType(nextOp) || op1Offset + valueType.GetSizeInBytes() != op2Offset) + { + return false; + } + + if (!CodeGenCommon.ConstFitsOnSImm7(op1Offset, valueType.GetSizeInBytesLog2())) + { + return false; + } + + if (currentOp.Instruction == Instruction.Load) + { + context.Assembler.LdpRiUn(currentOp.Destination, nextOp.Destination, op1Base, op1Offset); + } + else if (currentOp.Instruction == Instruction.Store) + { + context.Assembler.StpRiUn(currentOp.GetSource(1), nextOp.GetSource(1), op1Base, op1Offset); + } + else + { + return false; + } + + return true; + } + + private static bool IsLoadOrStore(Operation operation) + { + return operation.Instruction == Instruction.Load || operation.Instruction == Instruction.Store; + } + + private static OperandType GetMemOpValueType(Operation operation) + { + if (operation.Destination != default) + { + return operation.Destination.Type; + } + + return operation.GetSource(1).Type; + } + + private static bool TryGetMemOpBaseAndOffset(Operation operation, out Operand baseAddress, out int offset) + { + baseAddress = default; + offset = 0; + Operand address = operation.GetSource(0); + + if (address.Kind != OperandKind.Memory) + { + return false; + } + + MemoryOperand memOp = address.GetMemory(); + Operand baseOp = memOp.BaseAddress; + + if (baseOp == default) + { + baseOp = memOp.Index; + + if (baseOp == default || memOp.Scale != Multiplier.x1) + { + return false; + } + } + if (memOp.Index != default) + { + return false; + } + + baseAddress = memOp.BaseAddress; + offset = memOp.Displacement; + + return true; + } + + private static Operand Register(Operand operand, OperandType type = OperandType.I64) + { + return Register(operand.GetRegister().Index, type); + } + + private static Operand Register(int register, OperandType type = OperandType.I64) + { + return Factory.Register(register, RegisterType.Integer, type); + } + + private static int Align16(int value) + { + return (value + 0xf) & ~0xf; + } + + [Conditional("DEBUG")] + private static void ValidateUnOp(Operand dest, Operand source) + { + // Destination and source aren't forced to be equals + // EnsureSameReg (dest, source); + EnsureSameType(dest, source); + } + + [Conditional("DEBUG")] + private static void ValidateBinOp(Operand dest, Operand src1, Operand src2) + { + // Destination and source aren't forced to be equals + // EnsureSameReg (dest, src1); + EnsureSameType(dest, src1, src2); + } + + [Conditional("DEBUG")] + private static void ValidateShift(Operand dest, Operand src1, Operand src2) + { + // Destination and source aren't forced to be equals + // EnsureSameReg (dest, src1); + EnsureSameType(dest, src1); + + Debug.Assert(dest.Type.IsInteger() && src2.Type == OperandType.I32); + } + + private static void EnsureSameReg(Operand op1, Operand op2) + { + Debug.Assert(op1.Kind == OperandKind.Register || op1.Kind == OperandKind.Memory); + Debug.Assert(op1.Kind == op2.Kind); + Debug.Assert(op1.Value == op2.Value); + } + + private static void EnsureSameType(Operand op1, Operand op2) + { + Debug.Assert(op1.Type == op2.Type); + } + + private static void EnsureSameType(Operand op1, Operand op2, Operand op3) + { + Debug.Assert(op1.Type == op2.Type); + Debug.Assert(op1.Type == op3.Type); + } + + private static void EnsureSameType(Operand op1, Operand op2, Operand op3, Operand op4) + { + Debug.Assert(op1.Type == op2.Type); + Debug.Assert(op1.Type == op3.Type); + Debug.Assert(op1.Type == op4.Type); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs b/src/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs new file mode 100644 index 00000000..aaa00bb6 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs @@ -0,0 +1,662 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Diagnostics; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class CodeGeneratorIntrinsic + { + public static void GenerateOperation(CodeGenContext context, Operation operation) + { + Intrinsic intrin = operation.Intrinsic; + + IntrinsicInfo info = IntrinsicTable.GetInfo(intrin & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask)); + + switch (info.Type) + { + case IntrinsicType.ScalarUnary: + GenerateVectorUnary( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0)); + break; + case IntrinsicType.ScalarUnaryByElem: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorUnaryByElem( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(1).AsInt32(), + operation.Destination, + operation.GetSource(0)); + break; + case IntrinsicType.ScalarBinary: + GenerateVectorBinary( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.ScalarBinaryFPByElem: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryFPByElem( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(2).AsInt32(), + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.ScalarBinaryRd: + GenerateVectorUnary( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1)); + break; + case IntrinsicType.ScalarBinaryShl: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorBinaryShlImm( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.ScalarBinaryShr: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorBinaryShrImm( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.ScalarFPCompare: + GenerateScalarFPCompare( + context, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.ScalarFPConvFixed: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorBinaryShrImm( + context, + 0, + ((uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift) + 2u, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.ScalarFPConvFixedGpr: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateScalarFPConvGpr( + context, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.ScalarFPConvGpr: + GenerateScalarFPConvGpr( + context, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0)); + break; + case IntrinsicType.ScalarTernary: + GenerateScalarTernary( + context, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + operation.GetSource(2), + operation.GetSource(0)); + break; + case IntrinsicType.ScalarTernaryFPRdByElem: + Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant); + + GenerateVectorBinaryFPByElem( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(3).AsInt32(), + operation.Destination, + operation.GetSource(1), + operation.GetSource(2)); + break; + case IntrinsicType.ScalarTernaryShlRd: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryShlImm( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + (uint)operation.GetSource(2).AsInt32()); + break; + case IntrinsicType.ScalarTernaryShrRd: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryShrImm( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + (uint)operation.GetSource(2).AsInt32()); + break; + + case IntrinsicType.VectorUnary: + GenerateVectorUnary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0)); + break; + case IntrinsicType.VectorUnaryByElem: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorUnaryByElem( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(1).AsInt32(), + operation.Destination, + operation.GetSource(0)); + break; + case IntrinsicType.VectorBinary: + GenerateVectorBinary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.VectorBinaryBitwise: + GenerateVectorBinary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.VectorBinaryByElem: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryByElem( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(2).AsInt32(), + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.VectorBinaryFPByElem: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryFPByElem( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(2).AsInt32(), + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.VectorBinaryRd: + GenerateVectorUnary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1)); + break; + case IntrinsicType.VectorBinaryShl: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorBinaryShlImm( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.VectorBinaryShr: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorBinaryShrImm( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.VectorFPConvFixed: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorBinaryShrImm( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + ((uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift) + 2u, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.VectorInsertByElem: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant); + + GenerateVectorInsertByElem( + context, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(3).AsInt32(), + (uint)operation.GetSource(1).AsInt32(), + operation.Destination, + operation.GetSource(2)); + break; + case IntrinsicType.VectorLookupTable: + Debug.Assert((uint)(operation.SourcesCount - 2) <= 3); + + for (int i = 1; i < operation.SourcesCount - 1; i++) + { + Register currReg = operation.GetSource(i).GetRegister(); + Register prevReg = operation.GetSource(i - 1).GetRegister(); + + Debug.Assert(prevReg.Index + 1 == currReg.Index && currReg.Type == RegisterType.Vector); + } + + GenerateVectorBinary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + info.Inst | ((uint)(operation.SourcesCount - 2) << 13), + operation.Destination, + operation.GetSource(0), + operation.GetSource(operation.SourcesCount - 1)); + break; + case IntrinsicType.VectorTernaryFPRdByElem: + Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant); + + GenerateVectorBinaryFPByElem( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(3).AsInt32(), + operation.Destination, + operation.GetSource(1), + operation.GetSource(2)); + break; + case IntrinsicType.VectorTernaryRd: + GenerateVectorBinary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + operation.GetSource(2)); + break; + case IntrinsicType.VectorTernaryRdBitwise: + GenerateVectorBinary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + operation.GetSource(2)); + break; + case IntrinsicType.VectorTernaryRdByElem: + Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant); + + GenerateVectorBinaryByElem( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(3).AsInt32(), + operation.Destination, + operation.GetSource(1), + operation.GetSource(2)); + break; + case IntrinsicType.VectorTernaryShlRd: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryShlImm( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + (uint)operation.GetSource(2).AsInt32()); + break; + case IntrinsicType.VectorTernaryShrRd: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryShrImm( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + (uint)operation.GetSource(2).AsInt32()); + break; + + case IntrinsicType.GetRegister: + context.Assembler.WriteInstruction(info.Inst, operation.Destination); + break; + case IntrinsicType.SetRegister: + context.Assembler.WriteInstruction(info.Inst, operation.GetSource(0)); + break; + + default: + throw new NotImplementedException(info.Type.ToString()); + } + } + + private static void GenerateScalarFPCompare( + CodeGenContext context, + uint sz, + uint instruction, + Operand dest, + Operand rn, + Operand rm) + { + instruction |= (sz << 22); + + if (rm.Kind == OperandKind.Constant && rm.Value == 0) + { + instruction |= 0b1000; + rm = rn; + } + + context.Assembler.WriteInstructionRm16NoRet(instruction, rn, rm); + context.Assembler.Mrs(dest, 1, 3, 4, 2, 0); + } + + private static void GenerateScalarFPConvGpr( + CodeGenContext context, + uint sz, + uint instruction, + Operand rd, + Operand rn) + { + instruction |= (sz << 22); + + if (rd.Type.IsInteger()) + { + context.Assembler.WriteInstructionAuto(instruction, rd, rn); + } + else + { + if (rn.Type == OperandType.I64) + { + instruction |= Assembler.SfFlag; + } + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + } + + private static void GenerateScalarFPConvGpr( + CodeGenContext context, + uint sz, + uint instruction, + Operand rd, + Operand rn, + uint fBits) + { + Debug.Assert(fBits <= 64); + + instruction |= (sz << 22); + instruction |= (64 - fBits) << 10; + + if (rd.Type.IsInteger()) + { + Debug.Assert(rd.Type != OperandType.I32 || fBits <= 32); + + context.Assembler.WriteInstructionAuto(instruction, rd, rn); + } + else + { + if (rn.Type == OperandType.I64) + { + instruction |= Assembler.SfFlag; + } + else + { + Debug.Assert(fBits <= 32); + } + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + + } + + private static void GenerateScalarTernary( + CodeGenContext context, + uint sz, + uint instruction, + Operand rd, + Operand rn, + Operand rm, + Operand ra) + { + instruction |= (sz << 22); + + context.Assembler.WriteInstruction(instruction, rd, rn, rm, ra); + } + + private static void GenerateVectorUnary( + CodeGenContext context, + uint q, + uint sz, + uint instruction, + Operand rd, + Operand rn) + { + instruction |= (q << 30) | (sz << 22); + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + + private static void GenerateVectorUnaryByElem( + CodeGenContext context, + uint q, + uint sz, + uint instruction, + uint srcIndex, + Operand rd, + Operand rn) + { + uint imm5 = (srcIndex << ((int)sz + 1)) | (1u << (int)sz); + + instruction |= (q << 30) | (imm5 << 16); + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + + private static void GenerateVectorBinary( + CodeGenContext context, + uint q, + uint instruction, + Operand rd, + Operand rn, + Operand rm) + { + instruction |= (q << 30); + + context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm); + } + + private static void GenerateVectorBinary( + CodeGenContext context, + uint q, + uint sz, + uint instruction, + Operand rd, + Operand rn, + Operand rm) + { + instruction |= (q << 30) | (sz << 22); + + context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm); + } + + private static void GenerateVectorBinaryByElem( + CodeGenContext context, + uint q, + uint size, + uint instruction, + uint srcIndex, + Operand rd, + Operand rn, + Operand rm) + { + instruction |= (q << 30) | (size << 22); + + if (size == 2) + { + instruction |= ((srcIndex & 1) << 21) | ((srcIndex & 2) << 10); + } + else + { + instruction |= ((srcIndex & 3) << 20) | ((srcIndex & 4) << 9); + } + + context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm); + } + + private static void GenerateVectorBinaryFPByElem( + CodeGenContext context, + uint q, + uint sz, + uint instruction, + uint srcIndex, + Operand rd, + Operand rn, + Operand rm) + { + instruction |= (q << 30) | (sz << 22); + + if (sz != 0) + { + instruction |= (srcIndex & 1) << 11; + } + else + { + instruction |= ((srcIndex & 1) << 21) | ((srcIndex & 2) << 10); + } + + context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm); + } + + private static void GenerateVectorBinaryShlImm( + CodeGenContext context, + uint q, + uint sz, + uint instruction, + Operand rd, + Operand rn, + uint shift) + { + instruction |= (q << 30); + + Debug.Assert(shift >= 0 && shift < (8u << (int)sz)); + + uint imm = (8u << (int)sz) | (shift & (0x3fu >> (int)(3 - sz))); + + instruction |= (imm << 16); + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + + private static void GenerateVectorBinaryShrImm( + CodeGenContext context, + uint q, + uint sz, + uint instruction, + Operand rd, + Operand rn, + uint shift) + { + instruction |= (q << 30); + + Debug.Assert(shift > 0 && shift <= (8u << (int)sz)); + + uint imm = (8u << (int)sz) | ((8u << (int)sz) - shift); + + instruction |= (imm << 16); + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + + private static void GenerateVectorInsertByElem( + CodeGenContext context, + uint sz, + uint instruction, + uint srcIndex, + uint dstIndex, + Operand rd, + Operand rn) + { + uint imm4 = srcIndex << (int)sz; + uint imm5 = (dstIndex << ((int)sz + 1)) | (1u << (int)sz); + + instruction |= imm4 << 11; + instruction |= imm5 << 16; + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs b/src/ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs new file mode 100644 index 00000000..99ff299e --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs @@ -0,0 +1,185 @@ +using System; +using System.Linq; +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.Arm; +using System.Runtime.Versioning; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static partial class HardwareCapabilities + { + static HardwareCapabilities() + { + if (!ArmBase.Arm64.IsSupported) + { + return; + } + + if (OperatingSystem.IsLinux()) + { + LinuxFeatureInfoHwCap = (LinuxFeatureFlagsHwCap)getauxval(AT_HWCAP); + LinuxFeatureInfoHwCap2 = (LinuxFeatureFlagsHwCap2)getauxval(AT_HWCAP2); + } + + if (OperatingSystem.IsMacOS()) + { + for (int i = 0; i < _sysctlNames.Length; i++) + { + if (CheckSysctlName(_sysctlNames[i])) + { + MacOsFeatureInfo |= (MacOsFeatureFlags)(1 << i); + } + } + } + } + +#region Linux + + private const ulong AT_HWCAP = 16; + private const ulong AT_HWCAP2 = 26; + + [LibraryImport("libc", SetLastError = true)] + private static partial ulong getauxval(ulong type); + + [Flags] + public enum LinuxFeatureFlagsHwCap : ulong + { + Fp = 1 << 0, + Asimd = 1 << 1, + Evtstrm = 1 << 2, + Aes = 1 << 3, + Pmull = 1 << 4, + Sha1 = 1 << 5, + Sha2 = 1 << 6, + Crc32 = 1 << 7, + Atomics = 1 << 8, + FpHp = 1 << 9, + AsimdHp = 1 << 10, + CpuId = 1 << 11, + AsimdRdm = 1 << 12, + Jscvt = 1 << 13, + Fcma = 1 << 14, + Lrcpc = 1 << 15, + DcpOp = 1 << 16, + Sha3 = 1 << 17, + Sm3 = 1 << 18, + Sm4 = 1 << 19, + AsimdDp = 1 << 20, + Sha512 = 1 << 21, + Sve = 1 << 22, + AsimdFhm = 1 << 23, + Dit = 1 << 24, + Uscat = 1 << 25, + Ilrcpc = 1 << 26, + FlagM = 1 << 27, + Ssbs = 1 << 28, + Sb = 1 << 29, + Paca = 1 << 30, + Pacg = 1UL << 31 + } + + [Flags] + public enum LinuxFeatureFlagsHwCap2 : ulong + { + Dcpodp = 1 << 0, + Sve2 = 1 << 1, + SveAes = 1 << 2, + SvePmull = 1 << 3, + SveBitperm = 1 << 4, + SveSha3 = 1 << 5, + SveSm4 = 1 << 6, + FlagM2 = 1 << 7, + Frint = 1 << 8, + SveI8mm = 1 << 9, + SveF32mm = 1 << 10, + SveF64mm = 1 << 11, + SveBf16 = 1 << 12, + I8mm = 1 << 13, + Bf16 = 1 << 14, + Dgh = 1 << 15, + Rng = 1 << 16, + Bti = 1 << 17, + Mte = 1 << 18, + Ecv = 1 << 19, + Afp = 1 << 20, + Rpres = 1 << 21, + Mte3 = 1 << 22, + Sme = 1 << 23, + Sme_i16i64 = 1 << 24, + Sme_f64f64 = 1 << 25, + Sme_i8i32 = 1 << 26, + Sme_f16f32 = 1 << 27, + Sme_b16f32 = 1 << 28, + Sme_f32f32 = 1 << 29, + Sme_fa64 = 1 << 30, + Wfxt = 1UL << 31, + Ebf16 = 1UL << 32, + Sve_Ebf16 = 1UL << 33, + Cssc = 1UL << 34, + Rprfm = 1UL << 35, + Sve2p1 = 1UL << 36 + } + + public static LinuxFeatureFlagsHwCap LinuxFeatureInfoHwCap { get; } = 0; + public static LinuxFeatureFlagsHwCap2 LinuxFeatureInfoHwCap2 { get; } = 0; + +#endregion + +#region macOS + + [LibraryImport("libSystem.dylib", SetLastError = true)] + private static unsafe partial int sysctlbyname([MarshalAs(UnmanagedType.LPStr)] string name, out int oldValue, ref ulong oldSize, IntPtr newValue, ulong newValueSize); + + [SupportedOSPlatform("macos")] + private static bool CheckSysctlName(string name) + { + ulong size = sizeof(int); + if (sysctlbyname(name, out int val, ref size, IntPtr.Zero, 0) == 0 && size == sizeof(int)) + { + return val != 0; + } + return false; + } + + private static string[] _sysctlNames = new string[] + { + "hw.optional.floatingpoint", + "hw.optional.AdvSIMD", + "hw.optional.arm.FEAT_FP16", + "hw.optional.arm.FEAT_AES", + "hw.optional.arm.FEAT_PMULL", + "hw.optional.arm.FEAT_LSE", + "hw.optional.armv8_crc32", + "hw.optional.arm.FEAT_SHA1", + "hw.optional.arm.FEAT_SHA256" + }; + + [Flags] + public enum MacOsFeatureFlags + { + Fp = 1 << 0, + AdvSimd = 1 << 1, + Fp16 = 1 << 2, + Aes = 1 << 3, + Pmull = 1 << 4, + Lse = 1 << 5, + Crc32 = 1 << 6, + Sha1 = 1 << 7, + Sha256 = 1 << 8 + } + + public static MacOsFeatureFlags MacOsFeatureInfo { get; } = 0; + +#endregion + + public static bool SupportsAdvSimd => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Asimd) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.AdvSimd); + public static bool SupportsAes => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Aes) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Aes); + public static bool SupportsPmull => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Pmull) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Pmull); + public static bool SupportsLse => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Atomics) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Lse); + public static bool SupportsCrc32 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Crc32) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Crc32); + public static bool SupportsSha1 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Sha1) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Sha1); + public static bool SupportsSha256 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Sha2) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Sha256); + } +} diff --git a/src/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs b/src/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs new file mode 100644 index 00000000..8695db90 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.CodeGen.Arm64 +{ + struct IntrinsicInfo + { + public uint Inst { get; } + public IntrinsicType Type { get; } + + public IntrinsicInfo(uint inst, IntrinsicType type) + { + Inst = inst; + Type = type; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs b/src/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs new file mode 100644 index 00000000..a309d56d --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs @@ -0,0 +1,463 @@ +using ARMeilleure.Common; +using ARMeilleure.IntermediateRepresentation; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class IntrinsicTable + { + private static IntrinsicInfo[] _intrinTable; + + static IntrinsicTable() + { + _intrinTable = new IntrinsicInfo[EnumUtils.GetCount(typeof(Intrinsic))]; + + Add(Intrinsic.Arm64AbsS, new IntrinsicInfo(0x5e20b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64AbsV, new IntrinsicInfo(0x0e20b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64AddhnV, new IntrinsicInfo(0x0e204000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64AddpS, new IntrinsicInfo(0x5e31b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64AddpV, new IntrinsicInfo(0x0e20bc00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64AddvV, new IntrinsicInfo(0x0e31b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64AddS, new IntrinsicInfo(0x5e208400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64AddV, new IntrinsicInfo(0x0e208400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64AesdV, new IntrinsicInfo(0x4e285800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64AeseV, new IntrinsicInfo(0x4e284800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64AesimcV, new IntrinsicInfo(0x4e287800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64AesmcV, new IntrinsicInfo(0x4e286800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64AndV, new IntrinsicInfo(0x0e201c00u, IntrinsicType.VectorBinaryBitwise)); + Add(Intrinsic.Arm64BicVi, new IntrinsicInfo(0x2f001400u, IntrinsicType.VectorBinaryBitwiseImm)); + Add(Intrinsic.Arm64BicV, new IntrinsicInfo(0x0e601c00u, IntrinsicType.VectorBinaryBitwise)); + Add(Intrinsic.Arm64BifV, new IntrinsicInfo(0x2ee01c00u, IntrinsicType.VectorTernaryRdBitwise)); + Add(Intrinsic.Arm64BitV, new IntrinsicInfo(0x2ea01c00u, IntrinsicType.VectorTernaryRdBitwise)); + Add(Intrinsic.Arm64BslV, new IntrinsicInfo(0x2e601c00u, IntrinsicType.VectorTernaryRdBitwise)); + Add(Intrinsic.Arm64ClsV, new IntrinsicInfo(0x0e204800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64ClzV, new IntrinsicInfo(0x2e204800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64CmeqS, new IntrinsicInfo(0x7e208c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64CmeqV, new IntrinsicInfo(0x2e208c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64CmeqSz, new IntrinsicInfo(0x5e209800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64CmeqVz, new IntrinsicInfo(0x0e209800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64CmgeS, new IntrinsicInfo(0x5e203c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64CmgeV, new IntrinsicInfo(0x0e203c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64CmgeSz, new IntrinsicInfo(0x7e208800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64CmgeVz, new IntrinsicInfo(0x2e208800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64CmgtS, new IntrinsicInfo(0x5e203400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64CmgtV, new IntrinsicInfo(0x0e203400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64CmgtSz, new IntrinsicInfo(0x5e208800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64CmgtVz, new IntrinsicInfo(0x0e208800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64CmhiS, new IntrinsicInfo(0x7e203400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64CmhiV, new IntrinsicInfo(0x2e203400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64CmhsS, new IntrinsicInfo(0x7e203c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64CmhsV, new IntrinsicInfo(0x2e203c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64CmleSz, new IntrinsicInfo(0x7e209800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64CmleVz, new IntrinsicInfo(0x2e209800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64CmltSz, new IntrinsicInfo(0x5e20a800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64CmltVz, new IntrinsicInfo(0x0e20a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64CmtstS, new IntrinsicInfo(0x5e208c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64CmtstV, new IntrinsicInfo(0x0e208c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64CntV, new IntrinsicInfo(0x0e205800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64DupSe, new IntrinsicInfo(0x5e000400u, IntrinsicType.ScalarUnaryByElem)); + Add(Intrinsic.Arm64DupVe, new IntrinsicInfo(0x0e000400u, IntrinsicType.VectorUnaryByElem)); + Add(Intrinsic.Arm64DupGp, new IntrinsicInfo(0x0e000c00u, IntrinsicType.VectorUnaryByElem)); + Add(Intrinsic.Arm64EorV, new IntrinsicInfo(0x2e201c00u, IntrinsicType.VectorBinaryBitwise)); + Add(Intrinsic.Arm64ExtV, new IntrinsicInfo(0x2e000000u, IntrinsicType.VectorExt)); + Add(Intrinsic.Arm64FabdS, new IntrinsicInfo(0x7ea0d400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FabdV, new IntrinsicInfo(0x2ea0d400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FabsV, new IntrinsicInfo(0x0ea0f800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FabsS, new IntrinsicInfo(0x1e20c000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FacgeS, new IntrinsicInfo(0x7e20ec00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FacgeV, new IntrinsicInfo(0x2e20ec00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FacgtS, new IntrinsicInfo(0x7ea0ec00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FacgtV, new IntrinsicInfo(0x2ea0ec00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FaddpS, new IntrinsicInfo(0x7e30d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FaddpV, new IntrinsicInfo(0x2e20d400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FaddV, new IntrinsicInfo(0x0e20d400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FaddS, new IntrinsicInfo(0x1e202800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FccmpeS, new IntrinsicInfo(0x1e200410u, IntrinsicType.ScalarFPCompareCond)); + Add(Intrinsic.Arm64FccmpS, new IntrinsicInfo(0x1e200400u, IntrinsicType.ScalarFPCompareCond)); + Add(Intrinsic.Arm64FcmeqS, new IntrinsicInfo(0x5e20e400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FcmeqV, new IntrinsicInfo(0x0e20e400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FcmeqSz, new IntrinsicInfo(0x5ea0d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcmeqVz, new IntrinsicInfo(0x0ea0d800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcmgeS, new IntrinsicInfo(0x7e20e400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FcmgeV, new IntrinsicInfo(0x2e20e400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FcmgeSz, new IntrinsicInfo(0x7ea0c800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcmgeVz, new IntrinsicInfo(0x2ea0c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcmgtS, new IntrinsicInfo(0x7ea0e400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FcmgtV, new IntrinsicInfo(0x2ea0e400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FcmgtSz, new IntrinsicInfo(0x5ea0c800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcmgtVz, new IntrinsicInfo(0x0ea0c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcmleSz, new IntrinsicInfo(0x7ea0d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcmleVz, new IntrinsicInfo(0x2ea0d800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcmltSz, new IntrinsicInfo(0x5ea0e800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcmltVz, new IntrinsicInfo(0x0ea0e800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcmpeS, new IntrinsicInfo(0x1e202010u, IntrinsicType.ScalarFPCompare)); + Add(Intrinsic.Arm64FcmpS, new IntrinsicInfo(0x1e202000u, IntrinsicType.ScalarFPCompare)); + Add(Intrinsic.Arm64FcselS, new IntrinsicInfo(0x1e200c00u, IntrinsicType.ScalarFcsel)); + Add(Intrinsic.Arm64FcvtasS, new IntrinsicInfo(0x5e21c800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtasV, new IntrinsicInfo(0x0e21c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtasGp, new IntrinsicInfo(0x1e240000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtauS, new IntrinsicInfo(0x7e21c800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtauV, new IntrinsicInfo(0x2e21c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtauGp, new IntrinsicInfo(0x1e250000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtlV, new IntrinsicInfo(0x0e217800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtmsS, new IntrinsicInfo(0x5e21b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtmsV, new IntrinsicInfo(0x0e21b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtmsGp, new IntrinsicInfo(0x1e300000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtmuS, new IntrinsicInfo(0x7e21b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtmuV, new IntrinsicInfo(0x2e21b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtmuGp, new IntrinsicInfo(0x1e310000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtnsS, new IntrinsicInfo(0x5e21a800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtnsV, new IntrinsicInfo(0x0e21a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtnsGp, new IntrinsicInfo(0x1e200000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtnuS, new IntrinsicInfo(0x7e21a800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtnuV, new IntrinsicInfo(0x2e21a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtnuGp, new IntrinsicInfo(0x1e210000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtnV, new IntrinsicInfo(0x0e216800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64FcvtpsS, new IntrinsicInfo(0x5ea1a800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtpsV, new IntrinsicInfo(0x0ea1a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtpsGp, new IntrinsicInfo(0x1e280000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtpuS, new IntrinsicInfo(0x7ea1a800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtpuV, new IntrinsicInfo(0x2ea1a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtpuGp, new IntrinsicInfo(0x1e290000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtxnS, new IntrinsicInfo(0x7e216800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtxnV, new IntrinsicInfo(0x2e216800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtzsSFixed, new IntrinsicInfo(0x5f00fc00u, IntrinsicType.ScalarFPConvFixed)); + Add(Intrinsic.Arm64FcvtzsVFixed, new IntrinsicInfo(0x0f00fc00u, IntrinsicType.VectorFPConvFixed)); + Add(Intrinsic.Arm64FcvtzsS, new IntrinsicInfo(0x5ea1b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtzsV, new IntrinsicInfo(0x0ea1b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtzsGpFixed, new IntrinsicInfo(0x1e180000u, IntrinsicType.ScalarFPConvFixedGpr)); + Add(Intrinsic.Arm64FcvtzsGp, new IntrinsicInfo(0x1e380000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtzuSFixed, new IntrinsicInfo(0x7f00fc00u, IntrinsicType.ScalarFPConvFixed)); + Add(Intrinsic.Arm64FcvtzuVFixed, new IntrinsicInfo(0x2f00fc00u, IntrinsicType.VectorFPConvFixed)); + Add(Intrinsic.Arm64FcvtzuS, new IntrinsicInfo(0x7ea1b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtzuV, new IntrinsicInfo(0x2ea1b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtzuGpFixed, new IntrinsicInfo(0x1e190000u, IntrinsicType.ScalarFPConvFixedGpr)); + Add(Intrinsic.Arm64FcvtzuGp, new IntrinsicInfo(0x1e390000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtS, new IntrinsicInfo(0x1e224000u, IntrinsicType.ScalarFPConv)); + Add(Intrinsic.Arm64FdivV, new IntrinsicInfo(0x2e20fc00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FdivS, new IntrinsicInfo(0x1e201800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FmaddS, new IntrinsicInfo(0x1f000000u, IntrinsicType.ScalarTernary)); + Add(Intrinsic.Arm64FmaxnmpS, new IntrinsicInfo(0x7e30c800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FmaxnmpV, new IntrinsicInfo(0x2e20c400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FmaxnmvV, new IntrinsicInfo(0x2e30c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FmaxnmV, new IntrinsicInfo(0x0e20c400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FmaxnmS, new IntrinsicInfo(0x1e206800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FmaxpS, new IntrinsicInfo(0x7e30f800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FmaxpV, new IntrinsicInfo(0x2e20f400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FmaxvV, new IntrinsicInfo(0x2e30f800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FmaxV, new IntrinsicInfo(0x0e20f400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FmaxS, new IntrinsicInfo(0x1e204800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FminnmpS, new IntrinsicInfo(0x7eb0c800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FminnmpV, new IntrinsicInfo(0x2ea0c400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FminnmvV, new IntrinsicInfo(0x2eb0c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FminnmV, new IntrinsicInfo(0x0ea0c400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FminnmS, new IntrinsicInfo(0x1e207800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FminpS, new IntrinsicInfo(0x7eb0f800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FminpV, new IntrinsicInfo(0x2ea0f400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FminvV, new IntrinsicInfo(0x2eb0f800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FminV, new IntrinsicInfo(0x0ea0f400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FminS, new IntrinsicInfo(0x1e205800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FmlaSe, new IntrinsicInfo(0x5f801000u, IntrinsicType.ScalarTernaryFPRdByElem)); + Add(Intrinsic.Arm64FmlaVe, new IntrinsicInfo(0x0f801000u, IntrinsicType.VectorTernaryFPRdByElem)); + Add(Intrinsic.Arm64FmlaV, new IntrinsicInfo(0x0e20cc00u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64FmlsSe, new IntrinsicInfo(0x5f805000u, IntrinsicType.ScalarTernaryFPRdByElem)); + Add(Intrinsic.Arm64FmlsVe, new IntrinsicInfo(0x0f805000u, IntrinsicType.VectorTernaryFPRdByElem)); + Add(Intrinsic.Arm64FmlsV, new IntrinsicInfo(0x0ea0cc00u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64FmovVi, new IntrinsicInfo(0x0f00f400u, IntrinsicType.VectorFmovi)); + Add(Intrinsic.Arm64FmovS, new IntrinsicInfo(0x1e204000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FmovGp, new IntrinsicInfo(0x1e260000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FmovSi, new IntrinsicInfo(0x1e201000u, IntrinsicType.ScalarFmovi)); + Add(Intrinsic.Arm64FmsubS, new IntrinsicInfo(0x1f008000u, IntrinsicType.ScalarTernary)); + Add(Intrinsic.Arm64FmulxSe, new IntrinsicInfo(0x7f809000u, IntrinsicType.ScalarBinaryFPByElem)); + Add(Intrinsic.Arm64FmulxVe, new IntrinsicInfo(0x2f809000u, IntrinsicType.VectorBinaryFPByElem)); + Add(Intrinsic.Arm64FmulxS, new IntrinsicInfo(0x5e20dc00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FmulxV, new IntrinsicInfo(0x0e20dc00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FmulSe, new IntrinsicInfo(0x5f809000u, IntrinsicType.ScalarBinaryFPByElem)); + Add(Intrinsic.Arm64FmulVe, new IntrinsicInfo(0x0f809000u, IntrinsicType.VectorBinaryFPByElem)); + Add(Intrinsic.Arm64FmulV, new IntrinsicInfo(0x2e20dc00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FmulS, new IntrinsicInfo(0x1e200800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FnegV, new IntrinsicInfo(0x2ea0f800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FnegS, new IntrinsicInfo(0x1e214000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FnmaddS, new IntrinsicInfo(0x1f200000u, IntrinsicType.ScalarTernary)); + Add(Intrinsic.Arm64FnmsubS, new IntrinsicInfo(0x1f208000u, IntrinsicType.ScalarTernary)); + Add(Intrinsic.Arm64FnmulS, new IntrinsicInfo(0x1e208800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FrecpeS, new IntrinsicInfo(0x5ea1d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrecpeV, new IntrinsicInfo(0x0ea1d800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrecpsS, new IntrinsicInfo(0x5e20fc00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FrecpsV, new IntrinsicInfo(0x0e20fc00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FrecpxS, new IntrinsicInfo(0x5ea1f800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintaV, new IntrinsicInfo(0x2e218800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintaS, new IntrinsicInfo(0x1e264000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintiV, new IntrinsicInfo(0x2ea19800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintiS, new IntrinsicInfo(0x1e27c000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintmV, new IntrinsicInfo(0x0e219800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintmS, new IntrinsicInfo(0x1e254000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintnV, new IntrinsicInfo(0x0e218800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintnS, new IntrinsicInfo(0x1e244000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintpV, new IntrinsicInfo(0x0ea18800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintpS, new IntrinsicInfo(0x1e24c000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintxV, new IntrinsicInfo(0x2e219800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintxS, new IntrinsicInfo(0x1e274000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintzV, new IntrinsicInfo(0x0ea19800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintzS, new IntrinsicInfo(0x1e25c000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrsqrteS, new IntrinsicInfo(0x7ea1d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrsqrteV, new IntrinsicInfo(0x2ea1d800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrsqrtsS, new IntrinsicInfo(0x5ea0fc00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FrsqrtsV, new IntrinsicInfo(0x0ea0fc00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FsqrtV, new IntrinsicInfo(0x2ea1f800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FsqrtS, new IntrinsicInfo(0x1e21c000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FsubV, new IntrinsicInfo(0x0ea0d400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FsubS, new IntrinsicInfo(0x1e203800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64InsVe, new IntrinsicInfo(0x6e000400u, IntrinsicType.VectorInsertByElem)); + Add(Intrinsic.Arm64InsGp, new IntrinsicInfo(0x4e001c00u, IntrinsicType.ScalarUnaryByElem)); + Add(Intrinsic.Arm64Ld1rV, new IntrinsicInfo(0x0d40c000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld1Vms, new IntrinsicInfo(0x0c402000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld1Vss, new IntrinsicInfo(0x0d400000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64Ld2rV, new IntrinsicInfo(0x0d60c000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld2Vms, new IntrinsicInfo(0x0c408000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld2Vss, new IntrinsicInfo(0x0d600000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64Ld3rV, new IntrinsicInfo(0x0d40e000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld3Vms, new IntrinsicInfo(0x0c404000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld3Vss, new IntrinsicInfo(0x0d402000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64Ld4rV, new IntrinsicInfo(0x0d60e000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld4Vms, new IntrinsicInfo(0x0c400000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld4Vss, new IntrinsicInfo(0x0d602000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64MlaVe, new IntrinsicInfo(0x2f000000u, IntrinsicType.VectorTernaryRdByElem)); + Add(Intrinsic.Arm64MlaV, new IntrinsicInfo(0x0e209400u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64MlsVe, new IntrinsicInfo(0x2f004000u, IntrinsicType.VectorTernaryRdByElem)); + Add(Intrinsic.Arm64MlsV, new IntrinsicInfo(0x2e209400u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64MoviV, new IntrinsicInfo(0x0f000400u, IntrinsicType.VectorMovi)); + Add(Intrinsic.Arm64MrsFpcr, new IntrinsicInfo(0xd53b4400u, IntrinsicType.GetRegister)); + Add(Intrinsic.Arm64MsrFpcr, new IntrinsicInfo(0xd51b4400u, IntrinsicType.SetRegister)); + Add(Intrinsic.Arm64MrsFpsr, new IntrinsicInfo(0xd53b4420u, IntrinsicType.GetRegister)); + Add(Intrinsic.Arm64MsrFpsr, new IntrinsicInfo(0xd51b4420u, IntrinsicType.SetRegister)); + Add(Intrinsic.Arm64MulVe, new IntrinsicInfo(0x0f008000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64MulV, new IntrinsicInfo(0x0e209c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64MvniV, new IntrinsicInfo(0x2f000400u, IntrinsicType.VectorMvni)); + Add(Intrinsic.Arm64NegS, new IntrinsicInfo(0x7e20b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64NegV, new IntrinsicInfo(0x2e20b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64NotV, new IntrinsicInfo(0x2e205800u, IntrinsicType.VectorUnaryBitwise)); + Add(Intrinsic.Arm64OrnV, new IntrinsicInfo(0x0ee01c00u, IntrinsicType.VectorBinaryBitwise)); + Add(Intrinsic.Arm64OrrVi, new IntrinsicInfo(0x0f001400u, IntrinsicType.VectorBinaryBitwiseImm)); + Add(Intrinsic.Arm64OrrV, new IntrinsicInfo(0x0ea01c00u, IntrinsicType.VectorBinaryBitwise)); + Add(Intrinsic.Arm64PmullV, new IntrinsicInfo(0x0e20e000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64PmulV, new IntrinsicInfo(0x2e209c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64RaddhnV, new IntrinsicInfo(0x2e204000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64RbitV, new IntrinsicInfo(0x2e605800u, IntrinsicType.VectorUnaryBitwise)); + Add(Intrinsic.Arm64Rev16V, new IntrinsicInfo(0x0e201800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64Rev32V, new IntrinsicInfo(0x2e200800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64Rev64V, new IntrinsicInfo(0x0e200800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64RshrnV, new IntrinsicInfo(0x0f008c00u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64RsubhnV, new IntrinsicInfo(0x2e206000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64SabalV, new IntrinsicInfo(0x0e205000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64SabaV, new IntrinsicInfo(0x0e207c00u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64SabdlV, new IntrinsicInfo(0x0e207000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SabdV, new IntrinsicInfo(0x0e207400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SadalpV, new IntrinsicInfo(0x0e206800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64SaddlpV, new IntrinsicInfo(0x0e202800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64SaddlvV, new IntrinsicInfo(0x0e303800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64SaddlV, new IntrinsicInfo(0x0e200000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SaddwV, new IntrinsicInfo(0x0e201000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64ScvtfSFixed, new IntrinsicInfo(0x5f00e400u, IntrinsicType.ScalarFPConvFixed)); + Add(Intrinsic.Arm64ScvtfVFixed, new IntrinsicInfo(0x0f00e400u, IntrinsicType.VectorFPConvFixed)); + Add(Intrinsic.Arm64ScvtfS, new IntrinsicInfo(0x5e21d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64ScvtfV, new IntrinsicInfo(0x0e21d800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64ScvtfGpFixed, new IntrinsicInfo(0x1e020000u, IntrinsicType.ScalarFPConvFixedGpr)); + Add(Intrinsic.Arm64ScvtfGp, new IntrinsicInfo(0x1e220000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64Sha1cV, new IntrinsicInfo(0x5e000000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64Sha1hV, new IntrinsicInfo(0x5e280800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64Sha1mV, new IntrinsicInfo(0x5e002000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64Sha1pV, new IntrinsicInfo(0x5e001000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64Sha1su0V, new IntrinsicInfo(0x5e003000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64Sha1su1V, new IntrinsicInfo(0x5e281800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64Sha256h2V, new IntrinsicInfo(0x5e005000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64Sha256hV, new IntrinsicInfo(0x5e004000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64Sha256su0V, new IntrinsicInfo(0x5e282800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64Sha256su1V, new IntrinsicInfo(0x5e006000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64ShaddV, new IntrinsicInfo(0x0e200400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64ShllV, new IntrinsicInfo(0x2e213800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64ShlS, new IntrinsicInfo(0x5f005400u, IntrinsicType.ScalarBinaryShl)); + Add(Intrinsic.Arm64ShlV, new IntrinsicInfo(0x0f005400u, IntrinsicType.VectorBinaryShl)); + Add(Intrinsic.Arm64ShrnV, new IntrinsicInfo(0x0f008400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64ShsubV, new IntrinsicInfo(0x0e202400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SliS, new IntrinsicInfo(0x7f005400u, IntrinsicType.ScalarTernaryShlRd)); + Add(Intrinsic.Arm64SliV, new IntrinsicInfo(0x2f005400u, IntrinsicType.VectorTernaryShlRd)); + Add(Intrinsic.Arm64SmaxpV, new IntrinsicInfo(0x0e20a400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SmaxvV, new IntrinsicInfo(0x0e30a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64SmaxV, new IntrinsicInfo(0x0e206400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SminpV, new IntrinsicInfo(0x0e20ac00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SminvV, new IntrinsicInfo(0x0e31a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64SminV, new IntrinsicInfo(0x0e206c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SmlalVe, new IntrinsicInfo(0x0f002000u, IntrinsicType.VectorTernaryRdByElem)); + Add(Intrinsic.Arm64SmlalV, new IntrinsicInfo(0x0e208000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64SmlslVe, new IntrinsicInfo(0x0f006000u, IntrinsicType.VectorTernaryRdByElem)); + Add(Intrinsic.Arm64SmlslV, new IntrinsicInfo(0x0e20a000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64SmovV, new IntrinsicInfo(0x0e002c00u, IntrinsicType.VectorUnaryByElem)); + Add(Intrinsic.Arm64SmullVe, new IntrinsicInfo(0x0f00a000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64SmullV, new IntrinsicInfo(0x0e20c000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqabsS, new IntrinsicInfo(0x5e207800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64SqabsV, new IntrinsicInfo(0x0e207800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64SqaddS, new IntrinsicInfo(0x5e200c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqaddV, new IntrinsicInfo(0x0e200c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqdmlalSe, new IntrinsicInfo(0x5f003000u, IntrinsicType.ScalarBinaryByElem)); + Add(Intrinsic.Arm64SqdmlalVe, new IntrinsicInfo(0x0f003000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64SqdmlalS, new IntrinsicInfo(0x5e209000u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqdmlalV, new IntrinsicInfo(0x0e209000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqdmlslSe, new IntrinsicInfo(0x5f007000u, IntrinsicType.ScalarBinaryByElem)); + Add(Intrinsic.Arm64SqdmlslVe, new IntrinsicInfo(0x0f007000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64SqdmlslS, new IntrinsicInfo(0x5e20b000u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqdmlslV, new IntrinsicInfo(0x0e20b000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqdmulhSe, new IntrinsicInfo(0x5f00c000u, IntrinsicType.ScalarBinaryByElem)); + Add(Intrinsic.Arm64SqdmulhVe, new IntrinsicInfo(0x0f00c000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64SqdmulhS, new IntrinsicInfo(0x5e20b400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqdmulhV, new IntrinsicInfo(0x0e20b400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqdmullSe, new IntrinsicInfo(0x5f00b000u, IntrinsicType.ScalarBinaryByElem)); + Add(Intrinsic.Arm64SqdmullVe, new IntrinsicInfo(0x0f00b000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64SqdmullS, new IntrinsicInfo(0x5e20d000u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqdmullV, new IntrinsicInfo(0x0e20d000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqnegS, new IntrinsicInfo(0x7e207800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64SqnegV, new IntrinsicInfo(0x2e207800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64SqrdmulhSe, new IntrinsicInfo(0x5f00d000u, IntrinsicType.ScalarBinaryByElem)); + Add(Intrinsic.Arm64SqrdmulhVe, new IntrinsicInfo(0x0f00d000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64SqrdmulhS, new IntrinsicInfo(0x7e20b400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqrdmulhV, new IntrinsicInfo(0x2e20b400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqrshlS, new IntrinsicInfo(0x5e205c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqrshlV, new IntrinsicInfo(0x0e205c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqrshrnS, new IntrinsicInfo(0x5f009c00u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SqrshrnV, new IntrinsicInfo(0x0f009c00u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SqrshrunS, new IntrinsicInfo(0x7f008c00u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SqrshrunV, new IntrinsicInfo(0x2f008c00u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SqshluS, new IntrinsicInfo(0x7f006400u, IntrinsicType.ScalarBinaryShl)); + Add(Intrinsic.Arm64SqshluV, new IntrinsicInfo(0x2f006400u, IntrinsicType.VectorBinaryShl)); + Add(Intrinsic.Arm64SqshlSi, new IntrinsicInfo(0x5f007400u, IntrinsicType.ScalarBinaryShl)); + Add(Intrinsic.Arm64SqshlVi, new IntrinsicInfo(0x0f007400u, IntrinsicType.VectorBinaryShl)); + Add(Intrinsic.Arm64SqshlS, new IntrinsicInfo(0x5e204c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqshlV, new IntrinsicInfo(0x0e204c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqshrnS, new IntrinsicInfo(0x5f009400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SqshrnV, new IntrinsicInfo(0x0f009400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SqshrunS, new IntrinsicInfo(0x7f008400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SqshrunV, new IntrinsicInfo(0x2f008400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SqsubS, new IntrinsicInfo(0x5e202c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqsubV, new IntrinsicInfo(0x0e202c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqxtnS, new IntrinsicInfo(0x5e214800u, IntrinsicType.ScalarBinaryRd)); + Add(Intrinsic.Arm64SqxtnV, new IntrinsicInfo(0x0e214800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64SqxtunS, new IntrinsicInfo(0x7e212800u, IntrinsicType.ScalarBinaryRd)); + Add(Intrinsic.Arm64SqxtunV, new IntrinsicInfo(0x2e212800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64SrhaddV, new IntrinsicInfo(0x0e201400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SriS, new IntrinsicInfo(0x7f004400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SriV, new IntrinsicInfo(0x2f004400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SrshlS, new IntrinsicInfo(0x5e205400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SrshlV, new IntrinsicInfo(0x0e205400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SrshrS, new IntrinsicInfo(0x5f002400u, IntrinsicType.ScalarBinaryShr)); + Add(Intrinsic.Arm64SrshrV, new IntrinsicInfo(0x0f002400u, IntrinsicType.VectorBinaryShr)); + Add(Intrinsic.Arm64SrsraS, new IntrinsicInfo(0x5f003400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SrsraV, new IntrinsicInfo(0x0f003400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SshllV, new IntrinsicInfo(0x0f00a400u, IntrinsicType.VectorBinaryShl)); + Add(Intrinsic.Arm64SshlS, new IntrinsicInfo(0x5e204400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SshlV, new IntrinsicInfo(0x0e204400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SshrS, new IntrinsicInfo(0x5f000400u, IntrinsicType.ScalarBinaryShr)); + Add(Intrinsic.Arm64SshrV, new IntrinsicInfo(0x0f000400u, IntrinsicType.VectorBinaryShr)); + Add(Intrinsic.Arm64SsraS, new IntrinsicInfo(0x5f001400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SsraV, new IntrinsicInfo(0x0f001400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SsublV, new IntrinsicInfo(0x0e202000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SsubwV, new IntrinsicInfo(0x0e203000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64St1Vms, new IntrinsicInfo(0x0c002000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64St1Vss, new IntrinsicInfo(0x0d000000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64St2Vms, new IntrinsicInfo(0x0c008000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64St2Vss, new IntrinsicInfo(0x0d200000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64St3Vms, new IntrinsicInfo(0x0c004000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64St3Vss, new IntrinsicInfo(0x0d002000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64St4Vms, new IntrinsicInfo(0x0c000000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64St4Vss, new IntrinsicInfo(0x0d202000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64SubhnV, new IntrinsicInfo(0x0e206000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64SubS, new IntrinsicInfo(0x7e208400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SubV, new IntrinsicInfo(0x2e208400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SuqaddS, new IntrinsicInfo(0x5e203800u, IntrinsicType.ScalarBinaryRd)); + Add(Intrinsic.Arm64SuqaddV, new IntrinsicInfo(0x0e203800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64TblV, new IntrinsicInfo(0x0e000000u, IntrinsicType.VectorLookupTable)); + Add(Intrinsic.Arm64TbxV, new IntrinsicInfo(0x0e001000u, IntrinsicType.VectorLookupTable)); + Add(Intrinsic.Arm64Trn1V, new IntrinsicInfo(0x0e002800u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64Trn2V, new IntrinsicInfo(0x0e006800u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UabalV, new IntrinsicInfo(0x2e205000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64UabaV, new IntrinsicInfo(0x2e207c00u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64UabdlV, new IntrinsicInfo(0x2e207000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UabdV, new IntrinsicInfo(0x2e207400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UadalpV, new IntrinsicInfo(0x2e206800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64UaddlpV, new IntrinsicInfo(0x2e202800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UaddlvV, new IntrinsicInfo(0x2e303800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UaddlV, new IntrinsicInfo(0x2e200000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UaddwV, new IntrinsicInfo(0x2e201000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UcvtfSFixed, new IntrinsicInfo(0x7f00e400u, IntrinsicType.ScalarFPConvFixed)); + Add(Intrinsic.Arm64UcvtfVFixed, new IntrinsicInfo(0x2f00e400u, IntrinsicType.VectorFPConvFixed)); + Add(Intrinsic.Arm64UcvtfS, new IntrinsicInfo(0x7e21d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64UcvtfV, new IntrinsicInfo(0x2e21d800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UcvtfGpFixed, new IntrinsicInfo(0x1e030000u, IntrinsicType.ScalarFPConvFixedGpr)); + Add(Intrinsic.Arm64UcvtfGp, new IntrinsicInfo(0x1e230000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64UhaddV, new IntrinsicInfo(0x2e200400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UhsubV, new IntrinsicInfo(0x2e202400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UmaxpV, new IntrinsicInfo(0x2e20a400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UmaxvV, new IntrinsicInfo(0x2e30a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UmaxV, new IntrinsicInfo(0x2e206400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UminpV, new IntrinsicInfo(0x2e20ac00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UminvV, new IntrinsicInfo(0x2e31a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UminV, new IntrinsicInfo(0x2e206c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UmlalVe, new IntrinsicInfo(0x2f002000u, IntrinsicType.VectorTernaryRdByElem)); + Add(Intrinsic.Arm64UmlalV, new IntrinsicInfo(0x2e208000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64UmlslVe, new IntrinsicInfo(0x2f006000u, IntrinsicType.VectorTernaryRdByElem)); + Add(Intrinsic.Arm64UmlslV, new IntrinsicInfo(0x2e20a000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64UmovV, new IntrinsicInfo(0x0e003c00u, IntrinsicType.VectorUnaryByElem)); + Add(Intrinsic.Arm64UmullVe, new IntrinsicInfo(0x2f00a000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64UmullV, new IntrinsicInfo(0x2e20c000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UqaddS, new IntrinsicInfo(0x7e200c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64UqaddV, new IntrinsicInfo(0x2e200c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UqrshlS, new IntrinsicInfo(0x7e205c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64UqrshlV, new IntrinsicInfo(0x2e205c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UqrshrnS, new IntrinsicInfo(0x7f009c00u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64UqrshrnV, new IntrinsicInfo(0x2f009c00u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64UqshlSi, new IntrinsicInfo(0x7f007400u, IntrinsicType.ScalarBinaryShl)); + Add(Intrinsic.Arm64UqshlVi, new IntrinsicInfo(0x2f007400u, IntrinsicType.VectorBinaryShl)); + Add(Intrinsic.Arm64UqshlS, new IntrinsicInfo(0x7e204c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64UqshlV, new IntrinsicInfo(0x2e204c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UqshrnS, new IntrinsicInfo(0x7f009400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64UqshrnV, new IntrinsicInfo(0x2f009400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64UqsubS, new IntrinsicInfo(0x7e202c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64UqsubV, new IntrinsicInfo(0x2e202c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UqxtnS, new IntrinsicInfo(0x7e214800u, IntrinsicType.ScalarBinaryRd)); + Add(Intrinsic.Arm64UqxtnV, new IntrinsicInfo(0x2e214800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64UrecpeV, new IntrinsicInfo(0x0ea1c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UrhaddV, new IntrinsicInfo(0x2e201400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UrshlS, new IntrinsicInfo(0x7e205400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64UrshlV, new IntrinsicInfo(0x2e205400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UrshrS, new IntrinsicInfo(0x7f002400u, IntrinsicType.ScalarBinaryShr)); + Add(Intrinsic.Arm64UrshrV, new IntrinsicInfo(0x2f002400u, IntrinsicType.VectorBinaryShr)); + Add(Intrinsic.Arm64UrsqrteV, new IntrinsicInfo(0x2ea1c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UrsraS, new IntrinsicInfo(0x7f003400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64UrsraV, new IntrinsicInfo(0x2f003400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64UshllV, new IntrinsicInfo(0x2f00a400u, IntrinsicType.VectorBinaryShl)); + Add(Intrinsic.Arm64UshlS, new IntrinsicInfo(0x7e204400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64UshlV, new IntrinsicInfo(0x2e204400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UshrS, new IntrinsicInfo(0x7f000400u, IntrinsicType.ScalarBinaryShr)); + Add(Intrinsic.Arm64UshrV, new IntrinsicInfo(0x2f000400u, IntrinsicType.VectorBinaryShr)); + Add(Intrinsic.Arm64UsqaddS, new IntrinsicInfo(0x7e203800u, IntrinsicType.ScalarBinaryRd)); + Add(Intrinsic.Arm64UsqaddV, new IntrinsicInfo(0x2e203800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64UsraS, new IntrinsicInfo(0x7f001400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64UsraV, new IntrinsicInfo(0x2f001400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64UsublV, new IntrinsicInfo(0x2e202000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UsubwV, new IntrinsicInfo(0x2e203000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64Uzp1V, new IntrinsicInfo(0x0e001800u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64Uzp2V, new IntrinsicInfo(0x0e005800u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64XtnV, new IntrinsicInfo(0x0e212800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64Zip1V, new IntrinsicInfo(0x0e003800u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64Zip2V, new IntrinsicInfo(0x0e007800u, IntrinsicType.VectorBinary)); + } + + private static void Add(Intrinsic intrin, IntrinsicInfo info) + { + _intrinTable[(int)intrin] = info; + } + + public static IntrinsicInfo GetInfo(Intrinsic intrin) + { + return _intrinTable[(int)intrin]; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs b/src/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs new file mode 100644 index 00000000..800eca93 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs @@ -0,0 +1,59 @@ +namespace ARMeilleure.CodeGen.Arm64 +{ + enum IntrinsicType + { + ScalarUnary, + ScalarUnaryByElem, + ScalarBinary, + ScalarBinaryByElem, + ScalarBinaryFPByElem, + ScalarBinaryRd, + ScalarBinaryShl, + ScalarBinaryShr, + ScalarFcsel, + ScalarFmovi, + ScalarFPCompare, + ScalarFPCompareCond, + ScalarFPConv, + ScalarFPConvFixed, + ScalarFPConvFixedGpr, + ScalarFPConvGpr, + ScalarTernary, + ScalarTernaryFPRdByElem, + ScalarTernaryShlRd, + ScalarTernaryShrRd, + + VectorUnary, + VectorUnaryBitwise, + VectorUnaryByElem, + VectorBinary, + VectorBinaryBitwise, + VectorBinaryBitwiseImm, + VectorBinaryByElem, + VectorBinaryFPByElem, + VectorBinaryRd, + VectorBinaryShl, + VectorBinaryShr, + VectorExt, + VectorFmovi, + VectorFPConvFixed, + VectorInsertByElem, + VectorLdSt, + VectorLdStSs, + VectorLookupTable, + VectorMovi, + VectorMvni, + VectorTernaryFPRdByElem, + VectorTernaryRd, + VectorTernaryRdBitwise, + VectorTernaryRdByElem, + VectorTernaryShlRd, + VectorTernaryShrRd, + + Vector128Unary, + Vector128Binary, + + GetRegister, + SetRegister + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Arm64/PreAllocator.cs b/src/ARMeilleure/CodeGen/Arm64/PreAllocator.cs new file mode 100644 index 00000000..6ea9d239 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/PreAllocator.cs @@ -0,0 +1,892 @@ +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class PreAllocator + { + private class ConstantDict + { + private readonly Dictionary<(ulong, OperandType), Operand> _constants; + + public ConstantDict() + { + _constants = new Dictionary<(ulong, OperandType), Operand>(); + } + + public void Add(ulong value, OperandType type, Operand local) + { + _constants.Add((value, type), local); + } + + public bool TryGetValue(ulong value, OperandType type, out Operand local) + { + return _constants.TryGetValue((value, type), out local); + } + } + + public static void RunPass(CompilerContext cctx, StackAllocator stackAlloc, out int maxCallArgs) + { + maxCallArgs = -1; + + Span<Operation> buffer = default; + + Operand[] preservedArgs = new Operand[CallingConvention.GetArgumentsOnRegsCount()]; + + for (BasicBlock block = cctx.Cfg.Blocks.First; block != null; block = block.ListNext) + { + ConstantDict constants = new ConstantDict(); + + Operation nextNode; + + for (Operation node = block.Operations.First; node != default; node = nextNode) + { + nextNode = node.ListNext; + + if (node.Instruction == Instruction.Phi) + { + continue; + } + + InsertConstantRegCopies(constants, block.Operations, node); + InsertDestructiveRegCopies(block.Operations, node); + + switch (node.Instruction) + { + case Instruction.Call: + // Get the maximum number of arguments used on a call. + // On windows, when a struct is returned from the call, + // we also need to pass the pointer where the struct + // should be written on the first argument. + int argsCount = node.SourcesCount - 1; + + if (node.Destination != default && node.Destination.Type == OperandType.V128) + { + argsCount++; + } + + if (maxCallArgs < argsCount) + { + maxCallArgs = argsCount; + } + + // Copy values to registers expected by the function + // being called, as mandated by the ABI. + InsertCallCopies(constants, block.Operations, node); + break; + case Instruction.CompareAndSwap: + case Instruction.CompareAndSwap16: + case Instruction.CompareAndSwap8: + nextNode = GenerateCompareAndSwap(block.Operations, node); + break; + case Instruction.LoadArgument: + nextNode = InsertLoadArgumentCopy(cctx, ref buffer, block.Operations, preservedArgs, node); + break; + case Instruction.Return: + InsertReturnCopy(block.Operations, node); + break; + case Instruction.Tailcall: + InsertTailcallCopies(constants, block.Operations, stackAlloc, node, node); + break; + } + } + } + } + + private static void InsertConstantRegCopies(ConstantDict constants, IntrusiveList<Operation> nodes, Operation node) + { + if (node.SourcesCount == 0 || IsIntrinsicWithConst(node)) + { + return; + } + + Instruction inst = node.Instruction; + + Operand src1 = node.GetSource(0); + Operand src2; + + if (src1.Kind == OperandKind.Constant) + { + if (!src1.Type.IsInteger()) + { + // Handle non-integer types (FP32, FP64 and V128). + // For instructions without an immediate operand, we do the following: + // - Insert a copy with the constant value (as integer) to a GPR. + // - Insert a copy from the GPR to a XMM register. + // - Replace the constant use with the XMM register. + src1 = AddFloatConstantCopy(constants, nodes, node, src1); + + node.SetSource(0, src1); + } + else if (!HasConstSrc1(node, src1.Value)) + { + // Handle integer types. + // Most ALU instructions accepts a 32-bits immediate on the second operand. + // We need to ensure the following: + // - If the constant is on operand 1, we need to move it. + // -- But first, we try to swap operand 1 and 2 if the instruction is commutative. + // -- Doing so may allow us to encode the constant as operand 2 and avoid a copy. + // - If the constant is on operand 2, we check if the instruction supports it, + // if not, we also add a copy. 64-bits constants are usually not supported. + if (IsCommutative(node)) + { + src2 = node.GetSource(1); + + Operand temp = src1; + + src1 = src2; + src2 = temp; + + node.SetSource(0, src1); + node.SetSource(1, src2); + } + + if (src1.Kind == OperandKind.Constant) + { + src1 = AddIntConstantCopy(constants, nodes, node, src1); + + node.SetSource(0, src1); + } + } + } + + if (node.SourcesCount < 2) + { + return; + } + + src2 = node.GetSource(1); + + if (src2.Kind == OperandKind.Constant) + { + if (!src2.Type.IsInteger()) + { + src2 = AddFloatConstantCopy(constants, nodes, node, src2); + + node.SetSource(1, src2); + } + else if (!HasConstSrc2(inst, src2)) + { + src2 = AddIntConstantCopy(constants, nodes, node, src2); + + node.SetSource(1, src2); + } + } + + if (node.SourcesCount < 3 || + node.Instruction == Instruction.BranchIf || + node.Instruction == Instruction.Compare || + node.Instruction == Instruction.VectorInsert || + node.Instruction == Instruction.VectorInsert16 || + node.Instruction == Instruction.VectorInsert8) + { + return; + } + + for (int srcIndex = 2; srcIndex < node.SourcesCount; srcIndex++) + { + Operand src = node.GetSource(srcIndex); + + if (src.Kind == OperandKind.Constant) + { + if (!src.Type.IsInteger()) + { + src = AddFloatConstantCopy(constants, nodes, node, src); + + node.SetSource(srcIndex, src); + } + else + { + src = AddIntConstantCopy(constants, nodes, node, src); + + node.SetSource(srcIndex, src); + } + } + } + } + + private static void InsertDestructiveRegCopies(IntrusiveList<Operation> nodes, Operation node) + { + if (node.Destination == default || node.SourcesCount == 0) + { + return; + } + + Operand dest = node.Destination; + Operand src1 = node.GetSource(0); + + if (IsSameOperandDestSrc1(node) && src1.Kind == OperandKind.LocalVariable) + { + bool useNewLocal = false; + + for (int srcIndex = 1; srcIndex < node.SourcesCount; srcIndex++) + { + if (node.GetSource(srcIndex) == dest) + { + useNewLocal = true; + + break; + } + } + + if (useNewLocal) + { + // Dest is being used as some source already, we need to use a new + // local to store the temporary value, otherwise the value on dest + // local would be overwritten. + Operand temp = Local(dest.Type); + + nodes.AddBefore(node, Operation(Instruction.Copy, temp, src1)); + + node.SetSource(0, temp); + + nodes.AddAfter(node, Operation(Instruction.Copy, dest, temp)); + + node.Destination = temp; + } + else + { + nodes.AddBefore(node, Operation(Instruction.Copy, dest, src1)); + + node.SetSource(0, dest); + } + } + } + + private static void InsertCallCopies(ConstantDict constants, IntrusiveList<Operation> nodes, Operation node) + { + Operation operation = node; + + Operand dest = operation.Destination; + + List<Operand> sources = new List<Operand> + { + operation.GetSource(0) + }; + + int argsCount = operation.SourcesCount - 1; + + int intMax = CallingConvention.GetArgumentsOnRegsCount(); + int vecMax = CallingConvention.GetArgumentsOnRegsCount(); + + int intCount = 0; + int vecCount = 0; + + int stackOffset = 0; + + for (int index = 0; index < argsCount; index++) + { + Operand source = operation.GetSource(index + 1); + + bool passOnReg; + + if (source.Type.IsInteger()) + { + passOnReg = intCount < intMax; + } + else if (source.Type == OperandType.V128) + { + passOnReg = intCount + 1 < intMax; + } + else + { + passOnReg = vecCount < vecMax; + } + + if (source.Type == OperandType.V128 && passOnReg) + { + // V128 is a struct, we pass each half on a GPR if possible. + Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + + nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0))); + nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1))); + + continue; + } + + if (passOnReg) + { + Operand argReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type); + + Operation copyOp = Operation(Instruction.Copy, argReg, source); + + InsertConstantRegCopies(constants, nodes, nodes.AddBefore(node, copyOp)); + + sources.Add(argReg); + } + else + { + Operand offset = Const(stackOffset); + + Operation spillOp = Operation(Instruction.SpillArg, default, offset, source); + + InsertConstantRegCopies(constants, nodes, nodes.AddBefore(node, spillOp)); + + stackOffset += source.Type.GetSizeInBytes(); + } + } + + if (dest != default) + { + if (dest.Type == OperandType.V128) + { + Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64); + Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64); + + node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, retLReg)); + nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, retHReg, Const(1))); + + operation.Destination = default; + } + else + { + Operand retReg = dest.Type.IsInteger() + ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type) + : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type); + + Operation copyOp = Operation(Instruction.Copy, dest, retReg); + + nodes.AddAfter(node, copyOp); + + operation.Destination = retReg; + } + } + + operation.SetSources(sources.ToArray()); + } + + private static void InsertTailcallCopies( + ConstantDict constants, + IntrusiveList<Operation> nodes, + StackAllocator stackAlloc, + Operation node, + Operation operation) + { + List<Operand> sources = new List<Operand> + { + operation.GetSource(0) + }; + + int argsCount = operation.SourcesCount - 1; + + int intMax = CallingConvention.GetArgumentsOnRegsCount(); + int vecMax = CallingConvention.GetArgumentsOnRegsCount(); + + int intCount = 0; + int vecCount = 0; + + // Handle arguments passed on registers. + for (int index = 0; index < argsCount; index++) + { + Operand source = operation.GetSource(1 + index); + + bool passOnReg; + + if (source.Type.IsInteger()) + { + passOnReg = intCount + 1 < intMax; + } + else + { + passOnReg = vecCount < vecMax; + } + + if (source.Type == OperandType.V128 && passOnReg) + { + // V128 is a struct, we pass each half on a GPR if possible. + Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + + nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0))); + nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1))); + + continue; + } + + if (passOnReg) + { + Operand argReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type); + + Operation copyOp = Operation(Instruction.Copy, argReg, source); + + InsertConstantRegCopies(constants, nodes, nodes.AddBefore(node, copyOp)); + + sources.Add(argReg); + } + else + { + throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)"); + } + } + + // The target address must be on the return registers, since we + // don't return anything and it is guaranteed to not be a + // callee saved register (which would be trashed on the epilogue). + Operand tcAddress = Gpr(CodeGenCommon.TcAddressRegister, OperandType.I64); + + Operation addrCopyOp = Operation(Instruction.Copy, tcAddress, operation.GetSource(0)); + + nodes.AddBefore(node, addrCopyOp); + + sources[0] = tcAddress; + + operation.SetSources(sources.ToArray()); + } + + private static Operation GenerateCompareAndSwap(IntrusiveList<Operation> nodes, Operation node) + { + Operand expected = node.GetSource(1); + + if (expected.Type == OperandType.V128) + { + Operand dest = node.Destination; + Operand expectedLow = Local(OperandType.I64); + Operand expectedHigh = Local(OperandType.I64); + Operand desiredLow = Local(OperandType.I64); + Operand desiredHigh = Local(OperandType.I64); + Operand actualLow = Local(OperandType.I64); + Operand actualHigh = Local(OperandType.I64); + + Operand address = node.GetSource(0); + Operand desired = node.GetSource(2); + + void SplitOperand(Operand source, Operand low, Operand high) + { + nodes.AddBefore(node, Operation(Instruction.VectorExtract, low, source, Const(0))); + nodes.AddBefore(node, Operation(Instruction.VectorExtract, high, source, Const(1))); + } + + SplitOperand(expected, expectedLow, expectedHigh); + SplitOperand(desired, desiredLow, desiredHigh); + + Operation operation = node; + + // Update the sources and destinations with split 64-bit halfs of the whole 128-bit values. + // We also need a additional registers that will be used to store temporary information. + operation.SetDestinations(new[] { actualLow, actualHigh, Local(OperandType.I64), Local(OperandType.I64) }); + operation.SetSources(new[] { address, expectedLow, expectedHigh, desiredLow, desiredHigh }); + + // Add some dummy uses of the input operands, as the CAS operation will be a loop, + // so they can't be used as destination operand. + for (int i = 0; i < operation.SourcesCount; i++) + { + Operand src = operation.GetSource(i); + node = nodes.AddAfter(node, Operation(Instruction.Copy, src, src)); + } + + // Assemble the vector with the 64-bit values at the given memory location. + node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, actualLow)); + node = nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, actualHigh, Const(1))); + } + else + { + // We need a additional register where the store result will be written to. + node.SetDestinations(new[] { node.Destination, Local(OperandType.I32) }); + + // Add some dummy uses of the input operands, as the CAS operation will be a loop, + // so they can't be used as destination operand. + Operation operation = node; + + for (int i = 0; i < operation.SourcesCount; i++) + { + Operand src = operation.GetSource(i); + node = nodes.AddAfter(node, Operation(Instruction.Copy, src, src)); + } + } + + return node.ListNext; + } + + private static void InsertReturnCopy(IntrusiveList<Operation> nodes, Operation node) + { + if (node.SourcesCount == 0) + { + return; + } + + Operand source = node.GetSource(0); + + if (source.Type == OperandType.V128) + { + Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64); + Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64); + + nodes.AddBefore(node, Operation(Instruction.VectorExtract, retLReg, source, Const(0))); + nodes.AddBefore(node, Operation(Instruction.VectorExtract, retHReg, source, Const(1))); + } + else + { + Operand retReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntReturnRegister(), source.Type) + : Xmm(CallingConvention.GetVecReturnRegister(), source.Type); + + Operation retCopyOp = Operation(Instruction.Copy, retReg, source); + + nodes.AddBefore(node, retCopyOp); + } + } + + private static Operation InsertLoadArgumentCopy( + CompilerContext cctx, + ref Span<Operation> buffer, + IntrusiveList<Operation> nodes, + Operand[] preservedArgs, + Operation node) + { + Operand source = node.GetSource(0); + + Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind."); + + int index = source.AsInt32(); + + int intCount = 0; + int vecCount = 0; + + for (int cIndex = 0; cIndex < index; cIndex++) + { + OperandType argType = cctx.FuncArgTypes[cIndex]; + + if (argType.IsInteger()) + { + intCount++; + } + else if (argType == OperandType.V128) + { + intCount += 2; + } + else + { + vecCount++; + } + } + + bool passOnReg; + + if (source.Type.IsInteger()) + { + passOnReg = intCount < CallingConvention.GetArgumentsOnRegsCount(); + } + else if (source.Type == OperandType.V128) + { + passOnReg = intCount + 1 < CallingConvention.GetArgumentsOnRegsCount(); + } + else + { + passOnReg = vecCount < CallingConvention.GetArgumentsOnRegsCount(); + } + + if (passOnReg) + { + Operand dest = node.Destination; + + if (preservedArgs[index] == default) + { + if (dest.Type == OperandType.V128) + { + // V128 is a struct, we pass each half on a GPR if possible. + Operand pArg = Local(OperandType.V128); + + Operand argLReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount), OperandType.I64); + Operand argHReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount + 1), OperandType.I64); + + Operation copyL = Operation(Instruction.VectorCreateScalar, pArg, argLReg); + Operation copyH = Operation(Instruction.VectorInsert, pArg, pArg, argHReg, Const(1)); + + cctx.Cfg.Entry.Operations.AddFirst(copyH); + cctx.Cfg.Entry.Operations.AddFirst(copyL); + + preservedArgs[index] = pArg; + } + else + { + Operand pArg = Local(dest.Type); + + Operand argReg = dest.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(intCount), dest.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(vecCount), dest.Type); + + Operation copyOp = Operation(Instruction.Copy, pArg, argReg); + + cctx.Cfg.Entry.Operations.AddFirst(copyOp); + + preservedArgs[index] = pArg; + } + } + + Operation nextNode; + + if (dest.AssignmentsCount == 1) + { + // Let's propagate the argument if we can to avoid copies. + PreAllocatorCommon.Propagate(ref buffer, dest, preservedArgs[index]); + nextNode = node.ListNext; + } + else + { + Operation argCopyOp = Operation(Instruction.Copy, dest, preservedArgs[index]); + nextNode = nodes.AddBefore(node, argCopyOp); + } + + Delete(nodes, node); + return nextNode; + } + else + { + // TODO: Pass on stack. + return node; + } + } + + private static Operand AddFloatConstantCopy( + ConstantDict constants, + IntrusiveList<Operation> nodes, + Operation node, + Operand source) + { + Operand temp = Local(source.Type); + + Operand intConst = AddIntConstantCopy(constants, nodes, node, GetIntConst(source)); + + Operation copyOp = Operation(Instruction.VectorCreateScalar, temp, intConst); + + nodes.AddBefore(node, copyOp); + + return temp; + } + + private static Operand AddIntConstantCopy( + ConstantDict constants, + IntrusiveList<Operation> nodes, + Operation node, + Operand source) + { + if (constants.TryGetValue(source.Value, source.Type, out Operand temp)) + { + return temp; + } + + temp = Local(source.Type); + + Operation copyOp = Operation(Instruction.Copy, temp, source); + + nodes.AddBefore(node, copyOp); + + constants.Add(source.Value, source.Type, temp); + + return temp; + } + + private static Operand GetIntConst(Operand value) + { + if (value.Type == OperandType.FP32) + { + return Const(value.AsInt32()); + } + else if (value.Type == OperandType.FP64) + { + return Const(value.AsInt64()); + } + + return value; + } + + private static void Delete(IntrusiveList<Operation> nodes, Operation node) + { + node.Destination = default; + + for (int index = 0; index < node.SourcesCount; index++) + { + node.SetSource(index, default); + } + + nodes.Remove(node); + } + + private static Operand Gpr(int register, OperandType type) + { + return Register(register, RegisterType.Integer, type); + } + + private static Operand Xmm(int register, OperandType type) + { + return Register(register, RegisterType.Vector, type); + } + + private static bool IsSameOperandDestSrc1(Operation operation) + { + switch (operation.Instruction) + { + case Instruction.Extended: + return IsSameOperandDestSrc1(operation.Intrinsic); + case Instruction.VectorInsert: + case Instruction.VectorInsert16: + case Instruction.VectorInsert8: + return true; + } + + return false; + } + + private static bool IsSameOperandDestSrc1(Intrinsic intrinsic) + { + IntrinsicInfo info = IntrinsicTable.GetInfo(intrinsic & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask)); + + return info.Type == IntrinsicType.ScalarBinaryRd || + info.Type == IntrinsicType.ScalarTernaryFPRdByElem || + info.Type == IntrinsicType.ScalarTernaryShlRd || + info.Type == IntrinsicType.ScalarTernaryShrRd || + info.Type == IntrinsicType.VectorBinaryRd || + info.Type == IntrinsicType.VectorInsertByElem || + info.Type == IntrinsicType.VectorTernaryRd || + info.Type == IntrinsicType.VectorTernaryRdBitwise || + info.Type == IntrinsicType.VectorTernaryFPRdByElem || + info.Type == IntrinsicType.VectorTernaryRdByElem || + info.Type == IntrinsicType.VectorTernaryShlRd || + info.Type == IntrinsicType.VectorTernaryShrRd; + } + + private static bool HasConstSrc1(Operation node, ulong value) + { + switch (node.Instruction) + { + case Instruction.Add: + case Instruction.BranchIf: + case Instruction.Compare: + case Instruction.Subtract: + // The immediate encoding of those instructions does not allow Rn to be + // XZR (it will be SP instead), so we can't allow a Rn constant in this case. + return value == 0 && NotConstOrConst0(node.GetSource(1)); + case Instruction.BitwiseAnd: + case Instruction.BitwiseExclusiveOr: + case Instruction.BitwiseNot: + case Instruction.BitwiseOr: + case Instruction.ByteSwap: + case Instruction.CountLeadingZeros: + case Instruction.Multiply: + case Instruction.Negate: + case Instruction.RotateRight: + case Instruction.ShiftLeft: + case Instruction.ShiftRightSI: + case Instruction.ShiftRightUI: + return value == 0; + case Instruction.Copy: + case Instruction.LoadArgument: + case Instruction.Spill: + case Instruction.SpillArg: + return true; + case Instruction.Extended: + return value == 0; + } + + return false; + } + + private static bool NotConstOrConst0(Operand operand) + { + return operand.Kind != OperandKind.Constant || operand.Value == 0; + } + + private static bool HasConstSrc2(Instruction inst, Operand operand) + { + ulong value = operand.Value; + + switch (inst) + { + case Instruction.Add: + case Instruction.BranchIf: + case Instruction.Compare: + case Instruction.Subtract: + return ConstFitsOnUImm12Sh(value); + case Instruction.BitwiseAnd: + case Instruction.BitwiseExclusiveOr: + case Instruction.BitwiseOr: + return value == 0 || CodeGenCommon.TryEncodeBitMask(operand, out _, out _, out _); + case Instruction.Multiply: + case Instruction.Store: + case Instruction.Store16: + case Instruction.Store8: + return value == 0; + case Instruction.RotateRight: + case Instruction.ShiftLeft: + case Instruction.ShiftRightSI: + case Instruction.ShiftRightUI: + case Instruction.VectorExtract: + case Instruction.VectorExtract16: + case Instruction.VectorExtract8: + return true; + case Instruction.Extended: + // TODO: Check if actual intrinsic is supposed to have consts here? + // Right now we only hit this case for fixed-point int <-> FP conversion instructions. + return true; + } + + return false; + } + + private static bool IsCommutative(Operation operation) + { + switch (operation.Instruction) + { + case Instruction.Add: + case Instruction.BitwiseAnd: + case Instruction.BitwiseExclusiveOr: + case Instruction.BitwiseOr: + case Instruction.Multiply: + return true; + + case Instruction.BranchIf: + case Instruction.Compare: + { + Operand comp = operation.GetSource(2); + + Debug.Assert(comp.Kind == OperandKind.Constant); + + var compType = (Comparison)comp.AsInt32(); + + return compType == Comparison.Equal || compType == Comparison.NotEqual; + } + } + + return false; + } + + private static bool ConstFitsOnUImm12Sh(ulong value) + { + return (value & ~0xfffUL) == 0 || (value & ~0xfff000UL) == 0; + } + + private static bool IsIntrinsicWithConst(Operation operation) + { + bool isIntrinsic = IsIntrinsic(operation.Instruction); + + if (isIntrinsic) + { + Intrinsic intrinsic = operation.Intrinsic; + IntrinsicInfo info = IntrinsicTable.GetInfo(intrinsic & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask)); + + // Those have integer inputs that don't support consts. + return info.Type != IntrinsicType.ScalarFPConvGpr && + info.Type != IntrinsicType.ScalarFPConvFixedGpr && + info.Type != IntrinsicType.SetRegister; + } + + return false; + } + + private static bool IsIntrinsic(Instruction inst) + { + return inst == Instruction.Extended; + } + } +} diff --git a/src/ARMeilleure/CodeGen/CompiledFunction.cs b/src/ARMeilleure/CodeGen/CompiledFunction.cs new file mode 100644 index 00000000..0560bf2e --- /dev/null +++ b/src/ARMeilleure/CodeGen/CompiledFunction.cs @@ -0,0 +1,68 @@ +using ARMeilleure.CodeGen.Linking; +using ARMeilleure.CodeGen.Unwinding; +using ARMeilleure.Translation.Cache; +using System; +using System.Runtime.InteropServices; + +namespace ARMeilleure.CodeGen +{ + /// <summary> + /// Represents a compiled function. + /// </summary> + readonly struct CompiledFunction + { + /// <summary> + /// Gets the machine code of the <see cref="CompiledFunction"/>. + /// </summary> + public byte[] Code { get; } + + /// <summary> + /// Gets the <see cref="Unwinding.UnwindInfo"/> of the <see cref="CompiledFunction"/>. + /// </summary> + public UnwindInfo UnwindInfo { get; } + + /// <summary> + /// Gets the <see cref="Linking.RelocInfo"/> of the <see cref="CompiledFunction"/>. + /// </summary> + public RelocInfo RelocInfo { get; } + + /// <summary> + /// Initializes a new instance of the <see cref="CompiledFunction"/> struct with the specified machine code, + /// unwind info and relocation info. + /// </summary> + /// <param name="code">Machine code</param> + /// <param name="unwindInfo">Unwind info</param> + /// <param name="relocInfo">Relocation info</param> + internal CompiledFunction(byte[] code, UnwindInfo unwindInfo, RelocInfo relocInfo) + { + Code = code; + UnwindInfo = unwindInfo; + RelocInfo = relocInfo; + } + + /// <summary> + /// Maps the <see cref="CompiledFunction"/> onto the <see cref="JitCache"/> and returns a delegate of type + /// <typeparamref name="T"/> pointing to the mapped function. + /// </summary> + /// <typeparam name="T">Type of delegate</typeparam> + /// <returns>A delegate of type <typeparamref name="T"/> pointing to the mapped function</returns> + public T Map<T>() + { + return MapWithPointer<T>(out _); + } + + /// <summary> + /// Maps the <see cref="CompiledFunction"/> onto the <see cref="JitCache"/> and returns a delegate of type + /// <typeparamref name="T"/> pointing to the mapped function. + /// </summary> + /// <typeparam name="T">Type of delegate</typeparam> + /// <param name="codePointer">Pointer to the function code in memory</param> + /// <returns>A delegate of type <typeparamref name="T"/> pointing to the mapped function</returns> + public T MapWithPointer<T>(out IntPtr codePointer) + { + codePointer = JitCache.Map(this); + + return Marshal.GetDelegateForFunctionPointer<T>(codePointer); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Linking/RelocEntry.cs b/src/ARMeilleure/CodeGen/Linking/RelocEntry.cs new file mode 100644 index 00000000..a27bfded --- /dev/null +++ b/src/ARMeilleure/CodeGen/Linking/RelocEntry.cs @@ -0,0 +1,38 @@ +namespace ARMeilleure.CodeGen.Linking +{ + /// <summary> + /// Represents a relocation. + /// </summary> + readonly struct RelocEntry + { + public const int Stride = 13; // Bytes. + + /// <summary> + /// Gets the position of the relocation. + /// </summary> + public int Position { get; } + + /// <summary> + /// Gets the <see cref="Symbol"/> of the relocation. + /// </summary> + public Symbol Symbol { get; } + + /// <summary> + /// Initializes a new instance of the <see cref="RelocEntry"/> struct with the specified position and + /// <see cref="Symbol"/>. + /// </summary> + /// <param name="position">Position of relocation</param> + /// <param name="symbol">Symbol of relocation</param> + public RelocEntry(int position, Symbol symbol) + { + Position = position; + Symbol = symbol; + } + + /// <inheritdoc/> + public override string ToString() + { + return $"({nameof(Position)} = {Position}, {nameof(Symbol)} = {Symbol})"; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Linking/RelocInfo.cs b/src/ARMeilleure/CodeGen/Linking/RelocInfo.cs new file mode 100644 index 00000000..caaf08e3 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Linking/RelocInfo.cs @@ -0,0 +1,32 @@ +using System; + +namespace ARMeilleure.CodeGen.Linking +{ + /// <summary> + /// Represents relocation information about a <see cref="CompiledFunction"/>. + /// </summary> + readonly struct RelocInfo + { + /// <summary> + /// Gets an empty <see cref="RelocInfo"/>. + /// </summary> + public static RelocInfo Empty { get; } = new RelocInfo(null); + + private readonly RelocEntry[] _entries; + + /// <summary> + /// Gets the set of <see cref="RelocEntry"/>. + /// </summary> + public ReadOnlySpan<RelocEntry> Entries => _entries; + + /// <summary> + /// Initializes a new instance of the <see cref="RelocInfo"/> struct with the specified set of + /// <see cref="RelocEntry"/>. + /// </summary> + /// <param name="entries">Set of <see cref="RelocInfo"/> to use</param> + public RelocInfo(RelocEntry[] entries) + { + _entries = entries; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Linking/Symbol.cs b/src/ARMeilleure/CodeGen/Linking/Symbol.cs new file mode 100644 index 00000000..39e0c3eb --- /dev/null +++ b/src/ARMeilleure/CodeGen/Linking/Symbol.cs @@ -0,0 +1,99 @@ +using System; + +namespace ARMeilleure.CodeGen.Linking +{ + /// <summary> + /// Represents a symbol. + /// </summary> + readonly struct Symbol + { + private readonly ulong _value; + + /// <summary> + /// Gets the <see cref="SymbolType"/> of the <see cref="Symbol"/>. + /// </summary> + public SymbolType Type { get; } + + /// <summary> + /// Gets the value of the <see cref="Symbol"/>. + /// </summary> + /// <exception cref="InvalidOperationException"><see cref="Type"/> is <see cref="SymbolType.None"/></exception> + public ulong Value + { + get + { + if (Type == SymbolType.None) + { + ThrowSymbolNone(); + } + + return _value; + } + } + + /// <summary> + /// Initializes a new instance of the <see cref="Symbol"/> structure with the specified <see cref="SymbolType"/> and value. + /// </summary> + /// <param name="type">Type of symbol</param> + /// <param name="value">Value of symbol</param> + public Symbol(SymbolType type, ulong value) + { + (Type, _value) = (type, value); + } + + /// <summary> + /// Determines if the specified <see cref="Symbol"/> instances are equal. + /// </summary> + /// <param name="a">First instance</param> + /// <param name="b">Second instance</param> + /// <returns><see langword="true"/> if equal; otherwise <see langword="false"/></returns> + public static bool operator ==(Symbol a, Symbol b) + { + return a.Equals(b); + } + + /// <summary> + /// Determines if the specified <see cref="Symbol"/> instances are not equal. + /// </summary> + /// <param name="a">First instance</param> + /// <param name="b">Second instance</param> + /// <returns><see langword="true"/> if not equal; otherwise <see langword="false"/></returns> + public static bool operator !=(Symbol a, Symbol b) + { + return !(a == b); + } + + /// <summary> + /// Determines if the specified <see cref="Symbol"/> is equal to this <see cref="Symbol"/> instance. + /// </summary> + /// <param name="other">Other <see cref="Symbol"/> instance</param> + /// <returns><see langword="true"/> if equal; otherwise <see langword="false"/></returns> + public bool Equals(Symbol other) + { + return other.Type == Type && other._value == _value; + } + + /// <inheritdoc/> + public override bool Equals(object obj) + { + return obj is Symbol sym && Equals(sym); + } + + /// <inheritdoc/> + public override int GetHashCode() + { + return HashCode.Combine(Type, _value); + } + + /// <inheritdoc/> + public override string ToString() + { + return $"{Type}:{_value}"; + } + + private static void ThrowSymbolNone() + { + throw new InvalidOperationException("Symbol refers to nothing."); + } + } +} diff --git a/src/ARMeilleure/CodeGen/Linking/SymbolType.cs b/src/ARMeilleure/CodeGen/Linking/SymbolType.cs new file mode 100644 index 00000000..b05b6969 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Linking/SymbolType.cs @@ -0,0 +1,28 @@ +namespace ARMeilleure.CodeGen.Linking +{ + /// <summary> + /// Types of <see cref="Symbol"/>. + /// </summary> + enum SymbolType : byte + { + /// <summary> + /// Refers to nothing, i.e no symbol. + /// </summary> + None, + + /// <summary> + /// Refers to an entry in <see cref="Translation.Delegates"/>. + /// </summary> + DelegateTable, + + /// <summary> + /// Refers to an entry in <see cref="Translation.Translator.FunctionTable"/>. + /// </summary> + FunctionTable, + + /// <summary> + /// Refers to a special symbol which is handled by <see cref="Translation.PTC.Ptc.PatchCode"/>. + /// </summary> + Special + } +} diff --git a/src/ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs b/src/ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs new file mode 100644 index 00000000..9e243d37 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs @@ -0,0 +1,72 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System.Diagnostics; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.CodeGen.Optimizations +{ + static class BlockPlacement + { + public static void RunPass(ControlFlowGraph cfg) + { + bool update = false; + + BasicBlock block; + BasicBlock nextBlock; + + BasicBlock lastBlock = cfg.Blocks.Last; + + // Move cold blocks at the end of the list, so that they are emitted away from hot code. + for (block = cfg.Blocks.First; block != null; block = nextBlock) + { + nextBlock = block.ListNext; + + if (block.Frequency == BasicBlockFrequency.Cold) + { + cfg.Blocks.Remove(block); + cfg.Blocks.AddLast(block); + } + + if (block == lastBlock) + { + break; + } + } + + for (block = cfg.Blocks.First; block != null; block = nextBlock) + { + nextBlock = block.ListNext; + + if (block.SuccessorsCount == 2) + { + Operation branchOp = block.Operations.Last; + + Debug.Assert(branchOp.Instruction == Instruction.BranchIf); + + BasicBlock falseSucc = block.GetSuccessor(0); + BasicBlock trueSucc = block.GetSuccessor(1); + + // If true successor is next block in list, invert the condition. We avoid extra branching by + // making the true side the fallthrough (i.e, convert it to the false side). + if (trueSucc == block.ListNext) + { + Comparison comp = (Comparison)branchOp.GetSource(2).AsInt32(); + Comparison compInv = comp.Invert(); + + branchOp.SetSource(2, Const((int)compInv)); + + block.SetSuccessor(0, trueSucc); + block.SetSuccessor(1, falseSucc); + + update = true; + } + } + } + + if (update) + { + cfg.Update(); + } + } + } +} diff --git a/src/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs b/src/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs new file mode 100644 index 00000000..c5a22a53 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs @@ -0,0 +1,346 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.CodeGen.Optimizations +{ + static class ConstantFolding + { + public static void RunPass(Operation operation) + { + if (operation.Destination == default || operation.SourcesCount == 0) + { + return; + } + + if (!AreAllSourcesConstant(operation)) + { + return; + } + + OperandType type = operation.Destination.Type; + + switch (operation.Instruction) + { + case Instruction.Add: + if (operation.GetSource(0).Relocatable || + operation.GetSource(1).Relocatable) + { + break; + } + + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x + y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x + y); + } + break; + + case Instruction.BitwiseAnd: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x & y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x & y); + } + break; + + case Instruction.BitwiseExclusiveOr: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x ^ y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x ^ y); + } + break; + + case Instruction.BitwiseNot: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => ~x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => ~x); + } + break; + + case Instruction.BitwiseOr: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x | y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x | y); + } + break; + + case Instruction.ConvertI64ToI32: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => x); + } + break; + + case Instruction.Compare: + if (type == OperandType.I32 && + operation.GetSource(0).Type == type && + operation.GetSource(1).Type == type) + { + switch ((Comparison)operation.GetSource(2).Value) + { + case Comparison.Equal: + EvaluateBinaryI32(operation, (x, y) => x == y ? 1 : 0); + break; + case Comparison.NotEqual: + EvaluateBinaryI32(operation, (x, y) => x != y ? 1 : 0); + break; + case Comparison.Greater: + EvaluateBinaryI32(operation, (x, y) => x > y ? 1 : 0); + break; + case Comparison.LessOrEqual: + EvaluateBinaryI32(operation, (x, y) => x <= y ? 1 : 0); + break; + case Comparison.GreaterUI: + EvaluateBinaryI32(operation, (x, y) => (uint)x > (uint)y ? 1 : 0); + break; + case Comparison.LessOrEqualUI: + EvaluateBinaryI32(operation, (x, y) => (uint)x <= (uint)y ? 1 : 0); + break; + case Comparison.GreaterOrEqual: + EvaluateBinaryI32(operation, (x, y) => x >= y ? 1 : 0); + break; + case Comparison.Less: + EvaluateBinaryI32(operation, (x, y) => x < y ? 1 : 0); + break; + case Comparison.GreaterOrEqualUI: + EvaluateBinaryI32(operation, (x, y) => (uint)x >= (uint)y ? 1 : 0); + break; + case Comparison.LessUI: + EvaluateBinaryI32(operation, (x, y) => (uint)x < (uint)y ? 1 : 0); + break; + } + } + break; + + case Instruction.Copy: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => x); + } + break; + + case Instruction.Divide: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => y != 0 ? x / y : 0); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => y != 0 ? x / y : 0); + } + break; + + case Instruction.DivideUI: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => y != 0 ? (int)((uint)x / (uint)y) : 0); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => y != 0 ? (long)((ulong)x / (ulong)y) : 0); + } + break; + + case Instruction.Multiply: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x * y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x * y); + } + break; + + case Instruction.Negate: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => -x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => -x); + } + break; + + case Instruction.ShiftLeft: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x << y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x << (int)y); + } + break; + + case Instruction.ShiftRightSI: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x >> y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x >> (int)y); + } + break; + + case Instruction.ShiftRightUI: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => (int)((uint)x >> y)); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => (long)((ulong)x >> (int)y)); + } + break; + + case Instruction.SignExtend16: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => (short)x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => (short)x); + } + break; + + case Instruction.SignExtend32: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => (int)x); + } + break; + + case Instruction.SignExtend8: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => (sbyte)x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => (sbyte)x); + } + break; + + case Instruction.ZeroExtend16: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => (ushort)x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => (ushort)x); + } + break; + + case Instruction.ZeroExtend32: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => (uint)x); + } + break; + + case Instruction.ZeroExtend8: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => (byte)x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => (byte)x); + } + break; + + case Instruction.Subtract: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x - y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x - y); + } + break; + } + } + + private static bool AreAllSourcesConstant(Operation operation) + { + for (int index = 0; index < operation.SourcesCount; index++) + { + Operand srcOp = operation.GetSource(index); + + if (srcOp.Kind != OperandKind.Constant) + { + return false; + } + } + + return true; + } + + private static void EvaluateUnaryI32(Operation operation, Func<int, int> op) + { + int x = operation.GetSource(0).AsInt32(); + + operation.TurnIntoCopy(Const(op(x))); + } + + private static void EvaluateUnaryI64(Operation operation, Func<long, long> op) + { + long x = operation.GetSource(0).AsInt64(); + + operation.TurnIntoCopy(Const(op(x))); + } + + private static void EvaluateBinaryI32(Operation operation, Func<int, int, int> op) + { + int x = operation.GetSource(0).AsInt32(); + int y = operation.GetSource(1).AsInt32(); + + operation.TurnIntoCopy(Const(op(x, y))); + } + + private static void EvaluateBinaryI64(Operation operation, Func<long, long, long> op) + { + long x = operation.GetSource(0).AsInt64(); + long y = operation.GetSource(1).AsInt64(); + + operation.TurnIntoCopy(Const(op(x, y))); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Optimizations/Optimizer.cs b/src/ARMeilleure/CodeGen/Optimizations/Optimizer.cs new file mode 100644 index 00000000..a45bb455 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Optimizations/Optimizer.cs @@ -0,0 +1,252 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.CodeGen.Optimizations +{ + static class Optimizer + { + public static void RunPass(ControlFlowGraph cfg) + { + // Scratch buffer used to store uses. + Span<Operation> buffer = default; + + bool modified; + + do + { + modified = false; + + for (BasicBlock block = cfg.Blocks.Last; block != null; block = block.ListPrevious) + { + Operation node; + Operation prevNode; + + for (node = block.Operations.Last; node != default; node = prevNode) + { + prevNode = node.ListPrevious; + + if (IsUnused(node)) + { + RemoveNode(block, node); + + modified = true; + + continue; + } + else if (node.Instruction == Instruction.Phi) + { + continue; + } + + ConstantFolding.RunPass(node); + Simplification.RunPass(node); + + if (DestIsSingleLocalVar(node)) + { + if (IsPropagableCompare(node)) + { + modified |= PropagateCompare(ref buffer, node); + + if (modified && IsUnused(node)) + { + RemoveNode(block, node); + } + } + else if (IsPropagableCopy(node)) + { + PropagateCopy(ref buffer, node); + + RemoveNode(block, node); + + modified = true; + } + } + } + } + } + while (modified); + } + + public static void RemoveUnusedNodes(ControlFlowGraph cfg) + { + bool modified; + + do + { + modified = false; + + for (BasicBlock block = cfg.Blocks.Last; block != null; block = block.ListPrevious) + { + Operation node; + Operation prevNode; + + for (node = block.Operations.Last; node != default; node = prevNode) + { + prevNode = node.ListPrevious; + + if (IsUnused(node)) + { + RemoveNode(block, node); + + modified = true; + } + } + } + } + while (modified); + } + + private static bool PropagateCompare(ref Span<Operation> buffer, Operation compOp) + { + // Try to propagate Compare operations into their BranchIf uses, when these BranchIf uses are in the form + // of: + // + // - BranchIf %x, 0x0, Equal ;; i.e BranchIfFalse %x + // - BranchIf %x, 0x0, NotEqual ;; i.e BranchIfTrue %x + // + // The commutative property of Equal and NotEqual is taken into consideration as well. + // + // For example: + // + // %x = Compare %a, %b, comp + // BranchIf %x, 0x0, NotEqual + // + // => + // + // BranchIf %a, %b, comp + + static bool IsZeroBranch(Operation operation, out Comparison compType) + { + compType = Comparison.Equal; + + if (operation.Instruction != Instruction.BranchIf) + { + return false; + } + + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + Operand comp = operation.GetSource(2); + + compType = (Comparison)comp.AsInt32(); + + return (src1.Kind == OperandKind.Constant && src1.Value == 0) || + (src2.Kind == OperandKind.Constant && src2.Value == 0); + } + + bool modified = false; + + Operand dest = compOp.Destination; + Operand src1 = compOp.GetSource(0); + Operand src2 = compOp.GetSource(1); + Operand comp = compOp.GetSource(2); + + Comparison compType = (Comparison)comp.AsInt32(); + + Span<Operation> uses = dest.GetUses(ref buffer); + + foreach (Operation use in uses) + { + // If operation is a BranchIf and has a constant value 0 in its RHS or LHS source operands. + if (IsZeroBranch(use, out Comparison otherCompType)) + { + Comparison propCompType; + + if (otherCompType == Comparison.NotEqual) + { + propCompType = compType; + } + else if (otherCompType == Comparison.Equal) + { + propCompType = compType.Invert(); + } + else + { + continue; + } + + use.SetSource(0, src1); + use.SetSource(1, src2); + use.SetSource(2, Const((int)propCompType)); + + modified = true; + } + } + + return modified; + } + + private static void PropagateCopy(ref Span<Operation> buffer, Operation copyOp) + { + // Propagate copy source operand to all uses of the destination operand. + Operand dest = copyOp.Destination; + Operand source = copyOp.GetSource(0); + + Span<Operation> uses = dest.GetUses(ref buffer); + + foreach (Operation use in uses) + { + for (int index = 0; index < use.SourcesCount; index++) + { + if (use.GetSource(index) == dest) + { + use.SetSource(index, source); + } + } + } + } + + private static void RemoveNode(BasicBlock block, Operation node) + { + // Remove a node from the nodes list, and also remove itself + // from all the use lists on the operands that this node uses. + block.Operations.Remove(node); + + for (int index = 0; index < node.SourcesCount; index++) + { + node.SetSource(index, default); + } + + Debug.Assert(node.Destination == default || node.Destination.UsesCount == 0); + + node.Destination = default; + } + + private static bool IsUnused(Operation node) + { + return DestIsSingleLocalVar(node) && node.Destination.UsesCount == 0 && !HasSideEffects(node); + } + + private static bool DestIsSingleLocalVar(Operation node) + { + return node.DestinationsCount == 1 && node.Destination.Kind == OperandKind.LocalVariable; + } + + private static bool HasSideEffects(Operation node) + { + return node.Instruction == Instruction.Call + || node.Instruction == Instruction.Tailcall + || node.Instruction == Instruction.CompareAndSwap + || node.Instruction == Instruction.CompareAndSwap16 + || node.Instruction == Instruction.CompareAndSwap8; + } + + private static bool IsPropagableCompare(Operation operation) + { + return operation.Instruction == Instruction.Compare; + } + + private static bool IsPropagableCopy(Operation operation) + { + if (operation.Instruction != Instruction.Copy) + { + return false; + } + + return operation.Destination.Type == operation.GetSource(0).Type; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Optimizations/Simplification.cs b/src/ARMeilleure/CodeGen/Optimizations/Simplification.cs new file mode 100644 index 00000000..a439d642 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Optimizations/Simplification.cs @@ -0,0 +1,183 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.CodeGen.Optimizations +{ + static class Simplification + { + public static void RunPass(Operation operation) + { + switch (operation.Instruction) + { + case Instruction.Add: + if (operation.GetSource(0).Relocatable || + operation.GetSource(1).Relocatable) + { + break; + } + + TryEliminateBinaryOpComutative(operation, 0); + break; + + case Instruction.BitwiseAnd: + TryEliminateBitwiseAnd(operation); + break; + + case Instruction.BitwiseOr: + TryEliminateBitwiseOr(operation); + break; + + case Instruction.BitwiseExclusiveOr: + TryEliminateBitwiseExclusiveOr(operation); + break; + + case Instruction.ConditionalSelect: + TryEliminateConditionalSelect(operation); + break; + + case Instruction.Divide: + TryEliminateBinaryOpY(operation, 1); + break; + + case Instruction.Multiply: + TryEliminateBinaryOpComutative(operation, 1); + break; + + case Instruction.ShiftLeft: + case Instruction.ShiftRightSI: + case Instruction.ShiftRightUI: + case Instruction.Subtract: + TryEliminateBinaryOpY(operation, 0); + break; + } + } + + private static void TryEliminateBitwiseAnd(Operation operation) + { + // Try to recognize and optimize those 3 patterns (in order): + // x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y, + // x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000 + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (IsConstEqual(x, AllOnes(x.Type))) + { + operation.TurnIntoCopy(y); + } + else if (IsConstEqual(y, AllOnes(y.Type))) + { + operation.TurnIntoCopy(x); + } + else if (IsConstEqual(x, 0) || IsConstEqual(y, 0)) + { + operation.TurnIntoCopy(Const(x.Type, 0)); + } + } + + private static void TryEliminateBitwiseOr(Operation operation) + { + // Try to recognize and optimize those 3 patterns (in order): + // x | 0x00000000 == x, 0x00000000 | y == y, + // x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (IsConstEqual(x, 0)) + { + operation.TurnIntoCopy(y); + } + else if (IsConstEqual(y, 0)) + { + operation.TurnIntoCopy(x); + } + else if (IsConstEqual(x, AllOnes(x.Type)) || IsConstEqual(y, AllOnes(y.Type))) + { + operation.TurnIntoCopy(Const(AllOnes(x.Type))); + } + } + + private static void TryEliminateBitwiseExclusiveOr(Operation operation) + { + // Try to recognize and optimize those 2 patterns (in order): + // x ^ y == 0x00000000 when x == y + // 0x00000000 ^ y == y, x ^ 0x00000000 == x + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (x == y && x.Type.IsInteger()) + { + operation.TurnIntoCopy(Const(x.Type, 0)); + } + else + { + TryEliminateBinaryOpComutative(operation, 0); + } + } + + private static void TryEliminateBinaryOpY(Operation operation, ulong comparand) + { + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (IsConstEqual(y, comparand)) + { + operation.TurnIntoCopy(x); + } + } + + private static void TryEliminateBinaryOpComutative(Operation operation, ulong comparand) + { + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (IsConstEqual(x, comparand)) + { + operation.TurnIntoCopy(y); + } + else if (IsConstEqual(y, comparand)) + { + operation.TurnIntoCopy(x); + } + } + + private static void TryEliminateConditionalSelect(Operation operation) + { + Operand cond = operation.GetSource(0); + + if (cond.Kind != OperandKind.Constant) + { + return; + } + + // The condition is constant, we can turn it into a copy, and select + // the source based on the condition value. + int srcIndex = cond.Value != 0 ? 1 : 2; + + Operand source = operation.GetSource(srcIndex); + + operation.TurnIntoCopy(source); + } + + private static bool IsConstEqual(Operand operand, ulong comparand) + { + if (operand.Kind != OperandKind.Constant || !operand.Type.IsInteger()) + { + return false; + } + + return operand.Value == comparand; + } + + private static ulong AllOnes(OperandType type) + { + switch (type) + { + case OperandType.I32: return ~0U; + case OperandType.I64: return ~0UL; + } + + throw new ArgumentException("Invalid operand type \"" + type + "\"."); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Optimizations/TailMerge.cs b/src/ARMeilleure/CodeGen/Optimizations/TailMerge.cs new file mode 100644 index 00000000..e94df159 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Optimizations/TailMerge.cs @@ -0,0 +1,83 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.CodeGen.Optimizations +{ + static class TailMerge + { + public static void RunPass(in CompilerContext cctx) + { + ControlFlowGraph cfg = cctx.Cfg; + + BasicBlock mergedReturn = new(cfg.Blocks.Count); + + Operand returnValue; + Operation returnOp; + + if (cctx.FuncReturnType == OperandType.None) + { + returnValue = default; + returnOp = Operation(Instruction.Return, default); + } + else + { + returnValue = cfg.AllocateLocal(cctx.FuncReturnType); + returnOp = Operation(Instruction.Return, default, returnValue); + } + + mergedReturn.Frequency = BasicBlockFrequency.Cold; + mergedReturn.Operations.AddLast(returnOp); + + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + Operation op = block.Operations.Last; + + if (op != default && op.Instruction == Instruction.Return) + { + block.Operations.Remove(op); + + if (cctx.FuncReturnType == OperandType.None) + { + PrepareMerge(block, mergedReturn); + } + else + { + Operation copyOp = Operation(Instruction.Copy, returnValue, op.GetSource(0)); + + PrepareMerge(block, mergedReturn).Append(copyOp); + } + } + } + + cfg.Blocks.AddLast(mergedReturn); + cfg.Update(); + } + + private static BasicBlock PrepareMerge(BasicBlock from, BasicBlock to) + { + BasicBlock fromPred = from.Predecessors.Count == 1 ? from.Predecessors[0] : null; + + // If the block is empty, we can try to append to the predecessor and avoid unnecessary jumps. + if (from.Operations.Count == 0 && fromPred != null && fromPred.SuccessorsCount == 1) + { + for (int i = 0; i < fromPred.SuccessorsCount; i++) + { + if (fromPred.GetSuccessor(i) == from) + { + fromPred.SetSuccessor(i, to); + } + } + + // NOTE: `from` becomes unreachable and the call to `cfg.Update()` will remove it. + return fromPred; + } + else + { + from.AddSuccessor(to); + + return from; + } + } + } +} diff --git a/src/ARMeilleure/CodeGen/PreAllocatorCommon.cs b/src/ARMeilleure/CodeGen/PreAllocatorCommon.cs new file mode 100644 index 00000000..53f279fb --- /dev/null +++ b/src/ARMeilleure/CodeGen/PreAllocatorCommon.cs @@ -0,0 +1,57 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.CodeGen +{ + static class PreAllocatorCommon + { + public static void Propagate(ref Span<Operation> buffer, Operand dest, Operand value) + { + ReadOnlySpan<Operation> uses = dest.GetUses(ref buffer); + + foreach (Operation use in uses) + { + for (int srcIndex = 0; srcIndex < use.SourcesCount; srcIndex++) + { + Operand useSrc = use.GetSource(srcIndex); + + if (useSrc == dest) + { + use.SetSource(srcIndex, value); + } + else if (useSrc.Kind == OperandKind.Memory) + { + MemoryOperand memoryOp = useSrc.GetMemory(); + + Operand baseAddr = memoryOp.BaseAddress; + Operand index = memoryOp.Index; + bool changed = false; + + if (baseAddr == dest) + { + baseAddr = value; + changed = true; + } + + if (index == dest) + { + index = value; + changed = true; + } + + if (changed) + { + use.SetSource(srcIndex, MemoryOp( + useSrc.Type, + baseAddr, + index, + memoryOp.Scale, + memoryOp.Displacement)); + } + } + } + } + } + } +} diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs new file mode 100644 index 00000000..43e5c7e2 --- /dev/null +++ b/src/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs @@ -0,0 +1,19 @@ +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + readonly struct AllocationResult + { + public int IntUsedRegisters { get; } + public int VecUsedRegisters { get; } + public int SpillRegionSize { get; } + + public AllocationResult( + int intUsedRegisters, + int vecUsedRegisters, + int spillRegionSize) + { + IntUsedRegisters = intUsedRegisters; + VecUsedRegisters = vecUsedRegisters; + SpillRegionSize = spillRegionSize; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs new file mode 100644 index 00000000..587b1a02 --- /dev/null +++ b/src/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs @@ -0,0 +1,259 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Collections.Generic; + +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + class CopyResolver + { + private class ParallelCopy + { + private readonly struct Copy + { + public Register Dest { get; } + public Register Source { get; } + + public OperandType Type { get; } + + public Copy(Register dest, Register source, OperandType type) + { + Dest = dest; + Source = source; + Type = type; + } + } + + private readonly List<Copy> _copies; + + public int Count => _copies.Count; + + public ParallelCopy() + { + _copies = new List<Copy>(); + } + + public void AddCopy(Register dest, Register source, OperandType type) + { + _copies.Add(new Copy(dest, source, type)); + } + + public void Sequence(List<Operation> sequence) + { + Dictionary<Register, Register> locations = new Dictionary<Register, Register>(); + Dictionary<Register, Register> sources = new Dictionary<Register, Register>(); + + Dictionary<Register, OperandType> types = new Dictionary<Register, OperandType>(); + + Queue<Register> pendingQueue = new Queue<Register>(); + Queue<Register> readyQueue = new Queue<Register>(); + + foreach (Copy copy in _copies) + { + locations[copy.Source] = copy.Source; + sources[copy.Dest] = copy.Source; + types[copy.Dest] = copy.Type; + + pendingQueue.Enqueue(copy.Dest); + } + + foreach (Copy copy in _copies) + { + // If the destination is not used anywhere, we can assign it immediately. + if (!locations.ContainsKey(copy.Dest)) + { + readyQueue.Enqueue(copy.Dest); + } + } + + while (pendingQueue.TryDequeue(out Register current)) + { + Register copyDest; + Register origSource; + Register copySource; + + while (readyQueue.TryDequeue(out copyDest)) + { + origSource = sources[copyDest]; + copySource = locations[origSource]; + + OperandType type = types[copyDest]; + + EmitCopy(sequence, GetRegister(copyDest, type), GetRegister(copySource, type)); + + locations[origSource] = copyDest; + + if (origSource == copySource && sources.ContainsKey(origSource)) + { + readyQueue.Enqueue(origSource); + } + } + + copyDest = current; + origSource = sources[copyDest]; + copySource = locations[origSource]; + + if (copyDest != copySource) + { + OperandType type = types[copyDest]; + + type = type.IsInteger() ? OperandType.I64 : OperandType.V128; + + EmitXorSwap(sequence, GetRegister(copyDest, type), GetRegister(copySource, type)); + + locations[origSource] = copyDest; + + Register swapOther = copySource; + + if (copyDest != locations[sources[copySource]]) + { + // Find the other swap destination register. + // To do that, we search all the pending registers, and pick + // the one where the copy source register is equal to the + // current destination register being processed (copyDest). + foreach (Register pending in pendingQueue) + { + // Is this a copy of pending <- copyDest? + if (copyDest == locations[sources[pending]]) + { + swapOther = pending; + + break; + } + } + } + + // The value that was previously at "copyDest" now lives on + // "copySource" thanks to the swap, now we need to update the + // location for the next copy that is supposed to copy the value + // that used to live on "copyDest". + locations[sources[swapOther]] = copySource; + } + } + } + + private static void EmitCopy(List<Operation> sequence, Operand x, Operand y) + { + sequence.Add(Operation(Instruction.Copy, x, y)); + } + + private static void EmitXorSwap(List<Operation> sequence, Operand x, Operand y) + { + sequence.Add(Operation(Instruction.BitwiseExclusiveOr, x, x, y)); + sequence.Add(Operation(Instruction.BitwiseExclusiveOr, y, y, x)); + sequence.Add(Operation(Instruction.BitwiseExclusiveOr, x, x, y)); + } + } + + private Queue<Operation> _fillQueue = null; + private Queue<Operation> _spillQueue = null; + private ParallelCopy _parallelCopy = null; + + public bool HasCopy { get; private set; } + + public void AddSplit(LiveInterval left, LiveInterval right) + { + if (left.Local != right.Local) + { + throw new ArgumentException("Intervals of different variables are not allowed."); + } + + OperandType type = left.Local.Type; + + if (left.IsSpilled && !right.IsSpilled) + { + // Move from the stack to a register. + AddSplitFill(left, right, type); + } + else if (!left.IsSpilled && right.IsSpilled) + { + // Move from a register to the stack. + AddSplitSpill(left, right, type); + } + else if (!left.IsSpilled && !right.IsSpilled && left.Register != right.Register) + { + // Move from one register to another. + AddSplitCopy(left, right, type); + } + else if (left.SpillOffset != right.SpillOffset) + { + // This would be the stack-to-stack move case, but this is not supported. + throw new ArgumentException("Both intervals were spilled."); + } + } + + private void AddSplitFill(LiveInterval left, LiveInterval right, OperandType type) + { + if (_fillQueue == null) + { + _fillQueue = new Queue<Operation>(); + } + + Operand register = GetRegister(right.Register, type); + Operand offset = Const(left.SpillOffset); + + _fillQueue.Enqueue(Operation(Instruction.Fill, register, offset)); + + HasCopy = true; + } + + private void AddSplitSpill(LiveInterval left, LiveInterval right, OperandType type) + { + if (_spillQueue == null) + { + _spillQueue = new Queue<Operation>(); + } + + Operand offset = Const(right.SpillOffset); + Operand register = GetRegister(left.Register, type); + + _spillQueue.Enqueue(Operation(Instruction.Spill, default, offset, register)); + + HasCopy = true; + } + + private void AddSplitCopy(LiveInterval left, LiveInterval right, OperandType type) + { + if (_parallelCopy == null) + { + _parallelCopy = new ParallelCopy(); + } + + _parallelCopy.AddCopy(right.Register, left.Register, type); + + HasCopy = true; + } + + public Operation[] Sequence() + { + List<Operation> sequence = new List<Operation>(); + + if (_spillQueue != null) + { + while (_spillQueue.TryDequeue(out Operation spillOp)) + { + sequence.Add(spillOp); + } + } + + _parallelCopy?.Sequence(sequence); + + if (_fillQueue != null) + { + while (_fillQueue.TryDequeue(out Operation fillOp)) + { + sequence.Add(fillOp); + } + } + + return sequence.ToArray(); + } + + private static Operand GetRegister(Register reg, OperandType type) + { + return Register(reg.Index, reg.Type, type); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs new file mode 100644 index 00000000..25952c77 --- /dev/null +++ b/src/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs @@ -0,0 +1,454 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; +using System.Numerics; +using System.Runtime.CompilerServices; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + class HybridAllocator : IRegisterAllocator + { + private readonly struct BlockInfo + { + public bool HasCall { get; } + + public int IntFixedRegisters { get; } + public int VecFixedRegisters { get; } + + public BlockInfo(bool hasCall, int intFixedRegisters, int vecFixedRegisters) + { + HasCall = hasCall; + IntFixedRegisters = intFixedRegisters; + VecFixedRegisters = vecFixedRegisters; + } + } + + private struct LocalInfo + { + public int Uses { get; set; } + public int UsesAllocated { get; set; } + public int Sequence { get; set; } + public Operand Temp { get; set; } + public Operand Register { get; set; } + public Operand SpillOffset { get; set; } + public OperandType Type { get; } + + private int _first; + private int _last; + + public bool IsBlockLocal => _first == _last; + + public LocalInfo(OperandType type, int uses, int blkIndex) + { + Uses = uses; + Type = type; + + UsesAllocated = 0; + Sequence = 0; + Temp = default; + Register = default; + SpillOffset = default; + + _first = -1; + _last = -1; + + SetBlockIndex(blkIndex); + } + + public void SetBlockIndex(int blkIndex) + { + if (_first == -1 || blkIndex < _first) + { + _first = blkIndex; + } + + if (_last == -1 || blkIndex > _last) + { + _last = blkIndex; + } + } + } + + private const int MaxIROperands = 4; + // The "visited" state is stored in the MSB of the local's value. + private const ulong VisitedMask = 1ul << 63; + + private BlockInfo[] _blockInfo; + private LocalInfo[] _localInfo; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool IsVisited(Operand local) + { + Debug.Assert(local.Kind == OperandKind.LocalVariable); + + return (local.GetValueUnsafe() & VisitedMask) != 0; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void SetVisited(Operand local) + { + Debug.Assert(local.Kind == OperandKind.LocalVariable); + + local.GetValueUnsafe() |= VisitedMask; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private ref LocalInfo GetLocalInfo(Operand local) + { + Debug.Assert(local.Kind == OperandKind.LocalVariable); + Debug.Assert(IsVisited(local), "Local variable not visited. Used before defined?"); + + return ref _localInfo[(uint)local.GetValueUnsafe() - 1]; + } + + public AllocationResult RunPass(ControlFlowGraph cfg, StackAllocator stackAlloc, RegisterMasks regMasks) + { + int intUsedRegisters = 0; + int vecUsedRegisters = 0; + + int intFreeRegisters = regMasks.IntAvailableRegisters; + int vecFreeRegisters = regMasks.VecAvailableRegisters; + + _blockInfo = new BlockInfo[cfg.Blocks.Count]; + _localInfo = new LocalInfo[cfg.Blocks.Count * 3]; + + int localInfoCount = 0; + + for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + int intFixedRegisters = 0; + int vecFixedRegisters = 0; + + bool hasCall = false; + + for (Operation node = block.Operations.First; node != default; node = node.ListNext) + { + if (node.Instruction == Instruction.Call) + { + hasCall = true; + } + + foreach (Operand source in node.SourcesUnsafe) + { + if (source.Kind == OperandKind.LocalVariable) + { + GetLocalInfo(source).SetBlockIndex(block.Index); + } + else if (source.Kind == OperandKind.Memory) + { + MemoryOperand memOp = source.GetMemory(); + + if (memOp.BaseAddress != default) + { + GetLocalInfo(memOp.BaseAddress).SetBlockIndex(block.Index); + } + + if (memOp.Index != default) + { + GetLocalInfo(memOp.Index).SetBlockIndex(block.Index); + } + } + } + + foreach (Operand dest in node.DestinationsUnsafe) + { + if (dest.Kind == OperandKind.LocalVariable) + { + if (IsVisited(dest)) + { + GetLocalInfo(dest).SetBlockIndex(block.Index); + } + else + { + dest.NumberLocal(++localInfoCount); + + if (localInfoCount > _localInfo.Length) + { + Array.Resize(ref _localInfo, localInfoCount * 2); + } + + SetVisited(dest); + GetLocalInfo(dest) = new LocalInfo(dest.Type, UsesCount(dest), block.Index); + } + } + else if (dest.Kind == OperandKind.Register) + { + if (dest.Type.IsInteger()) + { + intFixedRegisters |= 1 << dest.GetRegister().Index; + } + else + { + vecFixedRegisters |= 1 << dest.GetRegister().Index; + } + } + } + } + + _blockInfo[block.Index] = new BlockInfo(hasCall, intFixedRegisters, vecFixedRegisters); + } + + int sequence = 0; + + for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + ref BlockInfo blkInfo = ref _blockInfo[block.Index]; + + int intLocalFreeRegisters = intFreeRegisters & ~blkInfo.IntFixedRegisters; + int vecLocalFreeRegisters = vecFreeRegisters & ~blkInfo.VecFixedRegisters; + + int intCallerSavedRegisters = blkInfo.HasCall ? regMasks.IntCallerSavedRegisters : 0; + int vecCallerSavedRegisters = blkInfo.HasCall ? regMasks.VecCallerSavedRegisters : 0; + + int intSpillTempRegisters = SelectSpillTemps( + intCallerSavedRegisters & ~blkInfo.IntFixedRegisters, + intLocalFreeRegisters); + int vecSpillTempRegisters = SelectSpillTemps( + vecCallerSavedRegisters & ~blkInfo.VecFixedRegisters, + vecLocalFreeRegisters); + + intLocalFreeRegisters &= ~(intSpillTempRegisters | intCallerSavedRegisters); + vecLocalFreeRegisters &= ~(vecSpillTempRegisters | vecCallerSavedRegisters); + + for (Operation node = block.Operations.First; node != default; node = node.ListNext) + { + int intLocalUse = 0; + int vecLocalUse = 0; + + Operand AllocateRegister(Operand local) + { + ref LocalInfo info = ref GetLocalInfo(local); + + info.UsesAllocated++; + + Debug.Assert(info.UsesAllocated <= info.Uses); + + if (info.Register != default) + { + if (info.UsesAllocated == info.Uses) + { + Register reg = info.Register.GetRegister(); + + if (local.Type.IsInteger()) + { + intLocalFreeRegisters |= 1 << reg.Index; + } + else + { + vecLocalFreeRegisters |= 1 << reg.Index; + } + } + + return info.Register; + } + else + { + Operand temp = info.Temp; + + if (temp == default || info.Sequence != sequence) + { + temp = local.Type.IsInteger() + ? GetSpillTemp(local, intSpillTempRegisters, ref intLocalUse) + : GetSpillTemp(local, vecSpillTempRegisters, ref vecLocalUse); + + info.Sequence = sequence; + info.Temp = temp; + } + + Operation fillOp = Operation(Instruction.Fill, temp, info.SpillOffset); + + block.Operations.AddBefore(node, fillOp); + + return temp; + } + } + + bool folded = false; + + // If operation is a copy of a local and that local is living on the stack, we turn the copy into + // a fill, instead of inserting a fill before it. + if (node.Instruction == Instruction.Copy) + { + Operand source = node.GetSource(0); + + if (source.Kind == OperandKind.LocalVariable) + { + ref LocalInfo info = ref GetLocalInfo(source); + + if (info.Register == default) + { + Operation fillOp = Operation(Instruction.Fill, node.Destination, info.SpillOffset); + + block.Operations.AddBefore(node, fillOp); + block.Operations.Remove(node); + + node = fillOp; + + folded = true; + } + } + } + + if (!folded) + { + foreach (ref Operand source in node.SourcesUnsafe) + { + if (source.Kind == OperandKind.LocalVariable) + { + source = AllocateRegister(source); + } + else if (source.Kind == OperandKind.Memory) + { + MemoryOperand memOp = source.GetMemory(); + + if (memOp.BaseAddress != default) + { + memOp.BaseAddress = AllocateRegister(memOp.BaseAddress); + } + + if (memOp.Index != default) + { + memOp.Index = AllocateRegister(memOp.Index); + } + } + } + } + + int intLocalAsg = 0; + int vecLocalAsg = 0; + + foreach (ref Operand dest in node.DestinationsUnsafe) + { + if (dest.Kind != OperandKind.LocalVariable) + { + continue; + } + + ref LocalInfo info = ref GetLocalInfo(dest); + + if (info.UsesAllocated == 0) + { + int mask = dest.Type.IsInteger() + ? intLocalFreeRegisters + : vecLocalFreeRegisters; + + if (info.IsBlockLocal && mask != 0) + { + int selectedReg = BitOperations.TrailingZeroCount(mask); + + info.Register = Register(selectedReg, info.Type.ToRegisterType(), info.Type); + + if (dest.Type.IsInteger()) + { + intLocalFreeRegisters &= ~(1 << selectedReg); + intUsedRegisters |= 1 << selectedReg; + } + else + { + vecLocalFreeRegisters &= ~(1 << selectedReg); + vecUsedRegisters |= 1 << selectedReg; + } + } + else + { + info.Register = default; + info.SpillOffset = Const(stackAlloc.Allocate(dest.Type.GetSizeInBytes())); + } + } + + info.UsesAllocated++; + + Debug.Assert(info.UsesAllocated <= info.Uses); + + if (info.Register != default) + { + dest = info.Register; + } + else + { + Operand temp = info.Temp; + + if (temp == default || info.Sequence != sequence) + { + temp = dest.Type.IsInteger() + ? GetSpillTemp(dest, intSpillTempRegisters, ref intLocalAsg) + : GetSpillTemp(dest, vecSpillTempRegisters, ref vecLocalAsg); + + info.Sequence = sequence; + info.Temp = temp; + } + + dest = temp; + + Operation spillOp = Operation(Instruction.Spill, default, info.SpillOffset, temp); + + block.Operations.AddAfter(node, spillOp); + + node = spillOp; + } + } + + sequence++; + + intUsedRegisters |= intLocalAsg | intLocalUse; + vecUsedRegisters |= vecLocalAsg | vecLocalUse; + } + } + + return new AllocationResult(intUsedRegisters, vecUsedRegisters, stackAlloc.TotalSize); + } + + private static int SelectSpillTemps(int mask0, int mask1) + { + int selection = 0; + int count = 0; + + while (count < MaxIROperands && mask0 != 0) + { + int mask = mask0 & -mask0; + + selection |= mask; + + mask0 &= ~mask; + + count++; + } + + while (count < MaxIROperands && mask1 != 0) + { + int mask = mask1 & -mask1; + + selection |= mask; + + mask1 &= ~mask; + + count++; + } + + Debug.Assert(count == MaxIROperands, "No enough registers for spill temps."); + + return selection; + } + + private static Operand GetSpillTemp(Operand local, int freeMask, ref int useMask) + { + int selectedReg = BitOperations.TrailingZeroCount(freeMask & ~useMask); + + useMask |= 1 << selectedReg; + + return Register(selectedReg, local.Type.ToRegisterType(), local.Type); + } + + private static int UsesCount(Operand local) + { + return local.AssignmentsCount + local.UsesCount; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs new file mode 100644 index 00000000..8f236c25 --- /dev/null +++ b/src/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs @@ -0,0 +1,12 @@ +using ARMeilleure.Translation; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + interface IRegisterAllocator + { + AllocationResult RunPass( + ControlFlowGraph cfg, + StackAllocator stackAlloc, + RegisterMasks regMasks); + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs new file mode 100644 index 00000000..d80157af --- /dev/null +++ b/src/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs @@ -0,0 +1,1101 @@ +using ARMeilleure.Common; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Numerics; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + // Based on: + // "Linear Scan Register Allocation for the Java(tm) HotSpot Client Compiler". + // http://www.christianwimmer.at/Publications/Wimmer04a/Wimmer04a.pdf + class LinearScanAllocator : IRegisterAllocator + { + private const int InstructionGap = 2; + private const int InstructionGapMask = InstructionGap - 1; + + private HashSet<int> _blockEdges; + private LiveRange[] _blockRanges; + private BitMap[] _blockLiveIn; + + private List<LiveInterval> _intervals; + private LiveInterval[] _parentIntervals; + + private List<(IntrusiveList<Operation>, Operation)> _operationNodes; + private int _operationsCount; + + private class AllocationContext + { + public RegisterMasks Masks { get; } + + public StackAllocator StackAlloc { get; } + + public BitMap Active { get; } + public BitMap Inactive { get; } + + public int IntUsedRegisters { get; set; } + public int VecUsedRegisters { get; set; } + + private readonly int[] _intFreePositions; + private readonly int[] _vecFreePositions; + private readonly int _intFreePositionsCount; + private readonly int _vecFreePositionsCount; + + public AllocationContext(StackAllocator stackAlloc, RegisterMasks masks, int intervalsCount) + { + StackAlloc = stackAlloc; + Masks = masks; + + Active = new BitMap(Allocators.Default, intervalsCount); + Inactive = new BitMap(Allocators.Default, intervalsCount); + + PopulateFreePositions(RegisterType.Integer, out _intFreePositions, out _intFreePositionsCount); + PopulateFreePositions(RegisterType.Vector, out _vecFreePositions, out _vecFreePositionsCount); + + void PopulateFreePositions(RegisterType type, out int[] positions, out int count) + { + positions = new int[masks.RegistersCount]; + count = BitOperations.PopCount((uint)masks.GetAvailableRegisters(type)); + + int mask = masks.GetAvailableRegisters(type); + + for (int i = 0; i < positions.Length; i++) + { + if ((mask & (1 << i)) != 0) + { + positions[i] = int.MaxValue; + } + } + } + } + + public void GetFreePositions(RegisterType type, in Span<int> positions, out int count) + { + if (type == RegisterType.Integer) + { + _intFreePositions.CopyTo(positions); + + count = _intFreePositionsCount; + } + else + { + Debug.Assert(type == RegisterType.Vector); + + _vecFreePositions.CopyTo(positions); + + count = _vecFreePositionsCount; + } + } + + public void MoveActiveToInactive(int bit) + { + Move(Active, Inactive, bit); + } + + public void MoveInactiveToActive(int bit) + { + Move(Inactive, Active, bit); + } + + private static void Move(BitMap source, BitMap dest, int bit) + { + source.Clear(bit); + + dest.Set(bit); + } + } + + public AllocationResult RunPass( + ControlFlowGraph cfg, + StackAllocator stackAlloc, + RegisterMasks regMasks) + { + NumberLocals(cfg, regMasks.RegistersCount); + + var context = new AllocationContext(stackAlloc, regMasks, _intervals.Count); + + BuildIntervals(cfg, context); + + for (int index = 0; index < _intervals.Count; index++) + { + LiveInterval current = _intervals[index]; + + if (current.IsEmpty) + { + continue; + } + + if (current.IsFixed) + { + context.Active.Set(index); + + if (current.IsFixedAndUsed) + { + if (current.Register.Type == RegisterType.Integer) + { + context.IntUsedRegisters |= 1 << current.Register.Index; + } + else /* if (interval.Register.Type == RegisterType.Vector) */ + { + context.VecUsedRegisters |= 1 << current.Register.Index; + } + } + + continue; + } + + AllocateInterval(context, current, index, regMasks.RegistersCount); + } + + for (int index = regMasks.RegistersCount * 2; index < _intervals.Count; index++) + { + if (!_intervals[index].IsSpilled) + { + ReplaceLocalWithRegister(_intervals[index]); + } + } + + InsertSplitCopies(); + InsertSplitCopiesAtEdges(cfg); + + return new AllocationResult(context.IntUsedRegisters, context.VecUsedRegisters, context.StackAlloc.TotalSize); + } + + private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex, int registersCount) + { + // Check active intervals that already ended. + foreach (int iIndex in context.Active) + { + LiveInterval interval = _intervals[iIndex]; + + interval.Forward(current.GetStart()); + + if (interval.GetEnd() < current.GetStart()) + { + context.Active.Clear(iIndex); + } + else if (!interval.Overlaps(current.GetStart())) + { + context.MoveActiveToInactive(iIndex); + } + } + + // Check inactive intervals that already ended or were reactivated. + foreach (int iIndex in context.Inactive) + { + LiveInterval interval = _intervals[iIndex]; + + interval.Forward(current.GetStart()); + + if (interval.GetEnd() < current.GetStart()) + { + context.Inactive.Clear(iIndex); + } + else if (interval.Overlaps(current.GetStart())) + { + context.MoveInactiveToActive(iIndex); + } + } + + if (!TryAllocateRegWithoutSpill(context, current, cIndex, registersCount)) + { + AllocateRegWithSpill(context, current, cIndex, registersCount); + } + } + + private bool TryAllocateRegWithoutSpill(AllocationContext context, LiveInterval current, int cIndex, int registersCount) + { + RegisterType regType = current.Local.Type.ToRegisterType(); + + Span<int> freePositions = stackalloc int[registersCount]; + + context.GetFreePositions(regType, freePositions, out int freePositionsCount); + + foreach (int iIndex in context.Active) + { + LiveInterval interval = _intervals[iIndex]; + Register reg = interval.Register; + + if (reg.Type == regType) + { + freePositions[reg.Index] = 0; + freePositionsCount--; + } + } + + // If all registers are already active, return early. No point in inspecting the inactive set to look for + // holes. + if (freePositionsCount == 0) + { + return false; + } + + foreach (int iIndex in context.Inactive) + { + LiveInterval interval = _intervals[iIndex]; + Register reg = interval.Register; + + ref int freePosition = ref freePositions[reg.Index]; + + if (reg.Type == regType && freePosition != 0) + { + int overlapPosition = interval.GetOverlapPosition(current); + + if (overlapPosition != LiveInterval.NotFound && freePosition > overlapPosition) + { + freePosition = overlapPosition; + } + } + } + + int selectedReg = GetHighestValueIndex(freePositions); + int selectedNextUse = freePositions[selectedReg]; + + // Intervals starts and ends at odd positions, unless they span an entire + // block, in this case they will have ranges at a even position. + // When a interval is loaded from the stack to a register, we can only + // do the split at a odd position, because otherwise the split interval + // that is inserted on the list to be processed may clobber a register + // used by the instruction at the same position as the split. + // The problem only happens when a interval ends exactly at this instruction, + // because otherwise they would interfere, and the register wouldn't be selected. + // When the interval is aligned and the above happens, there's no problem as + // the instruction that is actually with the last use is the one + // before that position. + selectedNextUse &= ~InstructionGapMask; + + if (selectedNextUse <= current.GetStart()) + { + return false; + } + else if (selectedNextUse < current.GetEnd()) + { + LiveInterval splitChild = current.Split(selectedNextUse); + + if (splitChild.UsesCount != 0) + { + Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); + + InsertInterval(splitChild, registersCount); + } + else + { + Spill(context, splitChild); + } + } + + current.Register = new Register(selectedReg, regType); + + if (regType == RegisterType.Integer) + { + context.IntUsedRegisters |= 1 << selectedReg; + } + else /* if (regType == RegisterType.Vector) */ + { + context.VecUsedRegisters |= 1 << selectedReg; + } + + context.Active.Set(cIndex); + + return true; + } + + private void AllocateRegWithSpill(AllocationContext context, LiveInterval current, int cIndex, int registersCount) + { + RegisterType regType = current.Local.Type.ToRegisterType(); + + Span<int> usePositions = stackalloc int[registersCount]; + Span<int> blockedPositions = stackalloc int[registersCount]; + + context.GetFreePositions(regType, usePositions, out _); + context.GetFreePositions(regType, blockedPositions, out _); + + foreach (int iIndex in context.Active) + { + LiveInterval interval = _intervals[iIndex]; + Register reg = interval.Register; + + if (reg.Type == regType) + { + ref int usePosition = ref usePositions[reg.Index]; + ref int blockedPosition = ref blockedPositions[reg.Index]; + + if (interval.IsFixed) + { + usePosition = 0; + blockedPosition = 0; + } + else + { + int nextUse = interval.NextUseAfter(current.GetStart()); + + if (nextUse != LiveInterval.NotFound && usePosition > nextUse) + { + usePosition = nextUse; + } + } + } + } + + foreach (int iIndex in context.Inactive) + { + LiveInterval interval = _intervals[iIndex]; + Register reg = interval.Register; + + if (reg.Type == regType) + { + ref int usePosition = ref usePositions[reg.Index]; + ref int blockedPosition = ref blockedPositions[reg.Index]; + + if (interval.IsFixed) + { + int overlapPosition = interval.GetOverlapPosition(current); + + if (overlapPosition != LiveInterval.NotFound) + { + blockedPosition = Math.Min(blockedPosition, overlapPosition); + usePosition = Math.Min(usePosition, overlapPosition); + } + } + else if (interval.Overlaps(current)) + { + int nextUse = interval.NextUseAfter(current.GetStart()); + + if (nextUse != LiveInterval.NotFound && usePosition > nextUse) + { + usePosition = nextUse; + } + } + } + } + + int selectedReg = GetHighestValueIndex(usePositions); + int currentFirstUse = current.FirstUse(); + + Debug.Assert(currentFirstUse >= 0, "Current interval has no uses."); + + if (usePositions[selectedReg] < currentFirstUse) + { + // All intervals on inactive and active are being used before current, + // so spill the current interval. + Debug.Assert(currentFirstUse > current.GetStart(), "Trying to spill a interval currently being used."); + + LiveInterval splitChild = current.Split(currentFirstUse); + + Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); + + InsertInterval(splitChild, registersCount); + + Spill(context, current); + } + else if (blockedPositions[selectedReg] > current.GetEnd()) + { + // Spill made the register available for the entire current lifetime, + // so we only need to split the intervals using the selected register. + current.Register = new Register(selectedReg, regType); + + SplitAndSpillOverlappingIntervals(context, current, registersCount); + + context.Active.Set(cIndex); + } + else + { + // There are conflicts even after spill due to the use of fixed registers + // that can't be spilled, so we need to also split current at the point of + // the first fixed register use. + current.Register = new Register(selectedReg, regType); + + int splitPosition = blockedPositions[selectedReg] & ~InstructionGapMask; + + Debug.Assert(splitPosition > current.GetStart(), "Trying to split a interval at a invalid position."); + + LiveInterval splitChild = current.Split(splitPosition); + + if (splitChild.UsesCount != 0) + { + Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); + + InsertInterval(splitChild, registersCount); + } + else + { + Spill(context, splitChild); + } + + SplitAndSpillOverlappingIntervals(context, current, registersCount); + + context.Active.Set(cIndex); + } + } + + private static int GetHighestValueIndex(Span<int> span) + { + int highest = int.MinValue; + + int selected = 0; + + for (int index = 0; index < span.Length; index++) + { + int current = span[index]; + + if (highest < current) + { + highest = current; + selected = index; + + if (current == int.MaxValue) + { + break; + } + } + } + + return selected; + } + + private void SplitAndSpillOverlappingIntervals(AllocationContext context, LiveInterval current, int registersCount) + { + foreach (int iIndex in context.Active) + { + LiveInterval interval = _intervals[iIndex]; + + if (!interval.IsFixed && interval.Register == current.Register) + { + SplitAndSpillOverlappingInterval(context, current, interval, registersCount); + + context.Active.Clear(iIndex); + } + } + + foreach (int iIndex in context.Inactive) + { + LiveInterval interval = _intervals[iIndex]; + + if (!interval.IsFixed && interval.Register == current.Register && interval.Overlaps(current)) + { + SplitAndSpillOverlappingInterval(context, current, interval, registersCount); + + context.Inactive.Clear(iIndex); + } + } + } + + private void SplitAndSpillOverlappingInterval( + AllocationContext context, + LiveInterval current, + LiveInterval interval, + int registersCount) + { + // If there's a next use after the start of the current interval, + // we need to split the spilled interval twice, and re-insert it + // on the "pending" list to ensure that it will get a new register + // on that use position. + int nextUse = interval.NextUseAfter(current.GetStart()); + + LiveInterval splitChild; + + if (interval.GetStart() < current.GetStart()) + { + splitChild = interval.Split(current.GetStart()); + } + else + { + splitChild = interval; + } + + if (nextUse != -1) + { + Debug.Assert(nextUse > current.GetStart(), "Trying to spill a interval currently being used."); + + if (nextUse > splitChild.GetStart()) + { + LiveInterval right = splitChild.Split(nextUse); + + Spill(context, splitChild); + + splitChild = right; + } + + InsertInterval(splitChild, registersCount); + } + else + { + Spill(context, splitChild); + } + } + + private void InsertInterval(LiveInterval interval, int registersCount) + { + Debug.Assert(interval.UsesCount != 0, "Trying to insert a interval without uses."); + Debug.Assert(!interval.IsEmpty, "Trying to insert a empty interval."); + Debug.Assert(!interval.IsSpilled, "Trying to insert a spilled interval."); + + int startIndex = registersCount * 2; + + int insertIndex = _intervals.BinarySearch(startIndex, _intervals.Count - startIndex, interval, null); + + if (insertIndex < 0) + { + insertIndex = ~insertIndex; + } + + _intervals.Insert(insertIndex, interval); + } + + private void Spill(AllocationContext context, LiveInterval interval) + { + Debug.Assert(!interval.IsFixed, "Trying to spill a fixed interval."); + Debug.Assert(interval.UsesCount == 0, "Trying to spill a interval with uses."); + + // We first check if any of the siblings were spilled, if so we can reuse + // the stack offset. Otherwise, we allocate a new space on the stack. + // This prevents stack-to-stack copies being necessary for a split interval. + if (!interval.TrySpillWithSiblingOffset()) + { + interval.Spill(context.StackAlloc.Allocate(interval.Local.Type)); + } + } + + private void InsertSplitCopies() + { + Dictionary<int, CopyResolver> copyResolvers = new Dictionary<int, CopyResolver>(); + + CopyResolver GetCopyResolver(int position) + { + if (!copyResolvers.TryGetValue(position, out CopyResolver copyResolver)) + { + copyResolver = new CopyResolver(); + + copyResolvers.Add(position, copyResolver); + } + + return copyResolver; + } + + foreach (LiveInterval interval in _intervals.Where(x => x.IsSplit)) + { + LiveInterval previous = interval; + + foreach (LiveInterval splitChild in interval.SplitChildren()) + { + int splitPosition = splitChild.GetStart(); + + if (!_blockEdges.Contains(splitPosition) && previous.GetEnd() == splitPosition) + { + GetCopyResolver(splitPosition).AddSplit(previous, splitChild); + } + + previous = splitChild; + } + } + + foreach (KeyValuePair<int, CopyResolver> kv in copyResolvers) + { + CopyResolver copyResolver = kv.Value; + + if (!copyResolver.HasCopy) + { + continue; + } + + int splitPosition = kv.Key; + + (IntrusiveList<Operation> nodes, Operation node) = GetOperationNode(splitPosition); + + Operation[] sequence = copyResolver.Sequence(); + + nodes.AddBefore(node, sequence[0]); + + node = sequence[0]; + + for (int index = 1; index < sequence.Length; index++) + { + nodes.AddAfter(node, sequence[index]); + + node = sequence[index]; + } + } + } + + private void InsertSplitCopiesAtEdges(ControlFlowGraph cfg) + { + int blocksCount = cfg.Blocks.Count; + + bool IsSplitEdgeBlock(BasicBlock block) + { + return block.Index >= blocksCount; + } + + // Reset iterators to beginning because GetSplitChild depends on the state of the iterator. + foreach (LiveInterval interval in _intervals) + { + interval.Reset(); + } + + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + if (IsSplitEdgeBlock(block)) + { + continue; + } + + bool hasSingleOrNoSuccessor = block.SuccessorsCount <= 1; + + for (int i = 0; i < block.SuccessorsCount; i++) + { + BasicBlock successor = block.GetSuccessor(i); + + int succIndex = successor.Index; + + // If the current node is a split node, then the actual successor node + // (the successor before the split) should be right after it. + if (IsSplitEdgeBlock(successor)) + { + succIndex = successor.GetSuccessor(0).Index; + } + + CopyResolver copyResolver = null; + + foreach (int iIndex in _blockLiveIn[succIndex]) + { + LiveInterval interval = _parentIntervals[iIndex]; + + if (!interval.IsSplit) + { + continue; + } + + int lEnd = _blockRanges[block.Index].End - 1; + int rStart = _blockRanges[succIndex].Start; + + LiveInterval left = interval.GetSplitChild(lEnd); + LiveInterval right = interval.GetSplitChild(rStart); + + if (left != default && right != default && left != right) + { + if (copyResolver == null) + { + copyResolver = new CopyResolver(); + } + + copyResolver.AddSplit(left, right); + } + } + + if (copyResolver == null || !copyResolver.HasCopy) + { + continue; + } + + Operation[] sequence = copyResolver.Sequence(); + + if (hasSingleOrNoSuccessor) + { + foreach (Operation operation in sequence) + { + block.Append(operation); + } + } + else if (successor.Predecessors.Count == 1) + { + successor.Operations.AddFirst(sequence[0]); + + Operation prependNode = sequence[0]; + + for (int index = 1; index < sequence.Length; index++) + { + Operation operation = sequence[index]; + + successor.Operations.AddAfter(prependNode, operation); + + prependNode = operation; + } + } + else + { + // Split the critical edge. + BasicBlock splitBlock = cfg.SplitEdge(block, successor); + + foreach (Operation operation in sequence) + { + splitBlock.Append(operation); + } + } + } + } + } + + private void ReplaceLocalWithRegister(LiveInterval current) + { + Operand register = GetRegister(current); + + foreach (int usePosition in current.UsePositions()) + { + (_, Operation operation) = GetOperationNode(usePosition); + + for (int index = 0; index < operation.SourcesCount; index++) + { + Operand source = operation.GetSource(index); + + if (source == current.Local) + { + operation.SetSource(index, register); + } + else if (source.Kind == OperandKind.Memory) + { + MemoryOperand memOp = source.GetMemory(); + + if (memOp.BaseAddress == current.Local) + { + memOp.BaseAddress = register; + } + + if (memOp.Index == current.Local) + { + memOp.Index = register; + } + } + } + + for (int index = 0; index < operation.DestinationsCount; index++) + { + Operand dest = operation.GetDestination(index); + + if (dest == current.Local) + { + operation.SetDestination(index, register); + } + } + } + } + + private static Operand GetRegister(LiveInterval interval) + { + Debug.Assert(!interval.IsSpilled, "Spilled intervals are not allowed."); + + return Operand.Factory.Register( + interval.Register.Index, + interval.Register.Type, + interval.Local.Type); + } + + private (IntrusiveList<Operation>, Operation) GetOperationNode(int position) + { + return _operationNodes[position / InstructionGap]; + } + + private void NumberLocals(ControlFlowGraph cfg, int registersCount) + { + _operationNodes = new List<(IntrusiveList<Operation>, Operation)>(); + _intervals = new List<LiveInterval>(); + + for (int index = 0; index < registersCount; index++) + { + _intervals.Add(new LiveInterval(new Register(index, RegisterType.Integer))); + _intervals.Add(new LiveInterval(new Register(index, RegisterType.Vector))); + } + + // The "visited" state is stored in the MSB of the local's value. + const ulong VisitedMask = 1ul << 63; + + bool IsVisited(Operand local) + { + return (local.GetValueUnsafe() & VisitedMask) != 0; + } + + void SetVisited(Operand local) + { + local.GetValueUnsafe() |= VisitedMask; + } + + _operationsCount = 0; + + for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + for (Operation node = block.Operations.First; node != default; node = node.ListNext) + { + _operationNodes.Add((block.Operations, node)); + + for (int i = 0; i < node.DestinationsCount; i++) + { + Operand dest = node.GetDestination(i); + + if (dest.Kind == OperandKind.LocalVariable && !IsVisited(dest)) + { + dest.NumberLocal(_intervals.Count); + + _intervals.Add(new LiveInterval(dest)); + + SetVisited(dest); + } + } + } + + _operationsCount += block.Operations.Count * InstructionGap; + + if (block.Operations.Count == 0) + { + // Pretend we have a dummy instruction on the empty block. + _operationNodes.Add((default, default)); + + _operationsCount += InstructionGap; + } + } + + _parentIntervals = _intervals.ToArray(); + } + + private void BuildIntervals(ControlFlowGraph cfg, AllocationContext context) + { + _blockRanges = new LiveRange[cfg.Blocks.Count]; + + int mapSize = _intervals.Count; + + BitMap[] blkLiveGen = new BitMap[cfg.Blocks.Count]; + BitMap[] blkLiveKill = new BitMap[cfg.Blocks.Count]; + + // Compute local live sets. + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + BitMap liveGen = new BitMap(Allocators.Default, mapSize); + BitMap liveKill = new BitMap(Allocators.Default, mapSize); + + for (Operation node = block.Operations.First; node != default; node = node.ListNext) + { + for (int i = 0; i < node.SourcesCount; i++) + { + VisitSource(node.GetSource(i)); + } + + for (int i = 0; i < node.DestinationsCount; i++) + { + VisitDestination(node.GetDestination(i)); + } + + void VisitSource(Operand source) + { + if (IsLocalOrRegister(source.Kind)) + { + int id = GetOperandId(source); + + if (!liveKill.IsSet(id)) + { + liveGen.Set(id); + } + } + else if (source.Kind == OperandKind.Memory) + { + MemoryOperand memOp = source.GetMemory(); + + if (memOp.BaseAddress != default) + { + VisitSource(memOp.BaseAddress); + } + + if (memOp.Index != default) + { + VisitSource(memOp.Index); + } + } + } + + void VisitDestination(Operand dest) + { + liveKill.Set(GetOperandId(dest)); + } + } + + blkLiveGen [block.Index] = liveGen; + blkLiveKill[block.Index] = liveKill; + } + + // Compute global live sets. + BitMap[] blkLiveIn = new BitMap[cfg.Blocks.Count]; + BitMap[] blkLiveOut = new BitMap[cfg.Blocks.Count]; + + for (int index = 0; index < cfg.Blocks.Count; index++) + { + blkLiveIn [index] = new BitMap(Allocators.Default, mapSize); + blkLiveOut[index] = new BitMap(Allocators.Default, mapSize); + } + + bool modified; + + do + { + modified = false; + + for (int index = 0; index < cfg.PostOrderBlocks.Length; index++) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + BitMap liveOut = blkLiveOut[block.Index]; + + for (int i = 0; i < block.SuccessorsCount; i++) + { + BasicBlock succ = block.GetSuccessor(i); + + modified |= liveOut.Set(blkLiveIn[succ.Index]); + } + + BitMap liveIn = blkLiveIn[block.Index]; + + liveIn.Set (liveOut); + liveIn.Clear(blkLiveKill[block.Index]); + liveIn.Set (blkLiveGen [block.Index]); + } + } + while (modified); + + _blockLiveIn = blkLiveIn; + + _blockEdges = new HashSet<int>(); + + // Compute lifetime intervals. + int operationPos = _operationsCount; + + for (int index = 0; index < cfg.PostOrderBlocks.Length; index++) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + // We handle empty blocks by pretending they have a dummy instruction, + // because otherwise the block would have the same start and end position, + // and this is not valid. + int instCount = Math.Max(block.Operations.Count, 1); + + int blockStart = operationPos - instCount * InstructionGap; + int blockEnd = operationPos; + + _blockRanges[block.Index] = new LiveRange(blockStart, blockEnd); + + _blockEdges.Add(blockStart); + + BitMap liveOut = blkLiveOut[block.Index]; + + foreach (int id in liveOut) + { + _intervals[id].AddRange(blockStart, blockEnd); + } + + if (block.Operations.Count == 0) + { + operationPos -= InstructionGap; + + continue; + } + + for (Operation node = block.Operations.Last; node != default; node = node.ListPrevious) + { + operationPos -= InstructionGap; + + for (int i = 0; i < node.DestinationsCount; i++) + { + VisitDestination(node.GetDestination(i)); + } + + for (int i = 0; i < node.SourcesCount; i++) + { + VisitSource(node.GetSource(i)); + } + + if (node.Instruction == Instruction.Call) + { + AddIntervalCallerSavedReg(context.Masks.IntCallerSavedRegisters, operationPos, RegisterType.Integer); + AddIntervalCallerSavedReg(context.Masks.VecCallerSavedRegisters, operationPos, RegisterType.Vector); + } + + void VisitSource(Operand source) + { + if (IsLocalOrRegister(source.Kind)) + { + LiveInterval interval = _intervals[GetOperandId(source)]; + + interval.AddRange(blockStart, operationPos + 1); + interval.AddUsePosition(operationPos); + } + else if (source.Kind == OperandKind.Memory) + { + MemoryOperand memOp = source.GetMemory(); + + if (memOp.BaseAddress != default) + { + VisitSource(memOp.BaseAddress); + } + + if (memOp.Index != default) + { + VisitSource(memOp.Index); + } + } + } + + void VisitDestination(Operand dest) + { + LiveInterval interval = _intervals[GetOperandId(dest)]; + + if (interval.IsFixed) + { + interval.IsFixedAndUsed = true; + } + + interval.SetStart(operationPos + 1); + interval.AddUsePosition(operationPos + 1); + } + } + } + + foreach (LiveInterval interval in _parentIntervals) + { + interval.Reset(); + } + } + + private void AddIntervalCallerSavedReg(int mask, int operationPos, RegisterType regType) + { + while (mask != 0) + { + int regIndex = BitOperations.TrailingZeroCount(mask); + + Register callerSavedReg = new Register(regIndex, regType); + + LiveInterval interval = _intervals[GetRegisterId(callerSavedReg)]; + + interval.AddRange(operationPos + 1, operationPos + InstructionGap); + + mask &= ~(1 << regIndex); + } + } + + private static int GetOperandId(Operand operand) + { + if (operand.Kind == OperandKind.LocalVariable) + { + return operand.GetLocalNumber(); + } + else if (operand.Kind == OperandKind.Register) + { + return GetRegisterId(operand.GetRegister()); + } + else + { + throw new ArgumentException($"Invalid operand kind \"{operand.Kind}\"."); + } + } + + private static int GetRegisterId(Register register) + { + return (register.Index << 1) | (register.Type == RegisterType.Vector ? 1 : 0); + } + + private static bool IsLocalOrRegister(OperandKind kind) + { + return kind == OperandKind.LocalVariable || + kind == OperandKind.Register; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs new file mode 100644 index 00000000..d739ad28 --- /dev/null +++ b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs @@ -0,0 +1,396 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Collections.Generic; +using System.Diagnostics; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + unsafe readonly struct LiveInterval : IComparable<LiveInterval> + { + public const int NotFound = -1; + + private struct Data + { + public int End; + public int SpillOffset; + + public LiveRange FirstRange; + public LiveRange PrevRange; + public LiveRange CurrRange; + + public LiveInterval Parent; + + public UseList Uses; + public LiveIntervalList Children; + + public Operand Local; + public Register Register; + + public bool IsFixed; + public bool IsFixedAndUsed; + } + + private readonly Data* _data; + + private ref int End => ref _data->End; + private ref LiveRange FirstRange => ref _data->FirstRange; + private ref LiveRange CurrRange => ref _data->CurrRange; + private ref LiveRange PrevRange => ref _data->PrevRange; + private ref LiveInterval Parent => ref _data->Parent; + private ref UseList Uses => ref _data->Uses; + private ref LiveIntervalList Children => ref _data->Children; + + public Operand Local => _data->Local; + public ref Register Register => ref _data->Register; + public ref int SpillOffset => ref _data->SpillOffset; + + public bool IsFixed => _data->IsFixed; + public ref bool IsFixedAndUsed => ref _data->IsFixedAndUsed; + public bool IsEmpty => FirstRange == default; + public bool IsSplit => Children.Count != 0; + public bool IsSpilled => SpillOffset != -1; + + public int UsesCount => Uses.Count; + + public LiveInterval(Operand local = default, LiveInterval parent = default) + { + _data = Allocators.LiveIntervals.Allocate<Data>(); + *_data = default; + + _data->IsFixed = false; + _data->Local = local; + + Parent = parent == default ? this : parent; + Uses = new UseList(); + Children = new LiveIntervalList(); + + FirstRange = default; + CurrRange = default; + PrevRange = default; + + SpillOffset = -1; + } + + public LiveInterval(Register register) : this(local: default, parent: default) + { + _data->IsFixed = true; + + Register = register; + } + + public void Reset() + { + PrevRange = default; + CurrRange = FirstRange; + } + + public void Forward(int position) + { + LiveRange prev = PrevRange; + LiveRange curr = CurrRange; + + while (curr != default && curr.Start < position && !curr.Overlaps(position)) + { + prev = curr; + curr = curr.Next; + } + + PrevRange = prev; + CurrRange = curr; + } + + public int GetStart() + { + Debug.Assert(!IsEmpty, "Empty LiveInterval cannot have a start position."); + + return FirstRange.Start; + } + + public void SetStart(int position) + { + if (FirstRange != default) + { + Debug.Assert(position != FirstRange.End); + + FirstRange.Start = position; + } + else + { + FirstRange = new LiveRange(position, position + 1); + End = position + 1; + } + } + + public int GetEnd() + { + Debug.Assert(!IsEmpty, "Empty LiveInterval cannot have an end position."); + + return End; + } + + public void AddRange(int start, int end) + { + Debug.Assert(start < end, $"Invalid range start position {start}, {end}"); + + if (FirstRange != default) + { + // If the new range ends exactly where the first range start, then coalesce together. + if (end == FirstRange.Start) + { + FirstRange.Start = start; + + return; + } + // If the new range is already contained, then coalesce together. + else if (FirstRange.Overlaps(start, end)) + { + FirstRange.Start = Math.Min(FirstRange.Start, start); + FirstRange.End = Math.Max(FirstRange.End, end); + End = Math.Max(End, end); + + Debug.Assert(FirstRange.Next == default || !FirstRange.Overlaps(FirstRange.Next)); + return; + } + } + + FirstRange = new LiveRange(start, end, FirstRange); + End = Math.Max(End, end); + + Debug.Assert(FirstRange.Next == default || !FirstRange.Overlaps(FirstRange.Next)); + } + + public void AddUsePosition(int position) + { + Uses.Add(position); + } + + public bool Overlaps(int position) + { + LiveRange curr = CurrRange; + + while (curr != default && curr.Start <= position) + { + if (curr.Overlaps(position)) + { + return true; + } + + curr = curr.Next; + } + + return false; + } + + public bool Overlaps(LiveInterval other) + { + return GetOverlapPosition(other) != NotFound; + } + + public int GetOverlapPosition(LiveInterval other) + { + LiveRange a = CurrRange; + LiveRange b = other.CurrRange; + + while (a != default) + { + while (b != default && b.Start < a.Start) + { + if (a.Overlaps(b)) + { + return a.Start; + } + + b = b.Next; + } + + if (b == default) + { + break; + } + else if (a.Overlaps(b)) + { + return a.Start; + } + + a = a.Next; + } + + return NotFound; + } + + public ReadOnlySpan<LiveInterval> SplitChildren() + { + return Parent.Children.Span; + } + + public ReadOnlySpan<int> UsePositions() + { + return Uses.Span; + } + + public int FirstUse() + { + return Uses.FirstUse; + } + + public int NextUseAfter(int position) + { + return Uses.NextUse(position); + } + + public LiveInterval Split(int position) + { + LiveInterval result = new(Local, Parent); + result.End = End; + + LiveRange prev = PrevRange; + LiveRange curr = CurrRange; + + while (curr != default && curr.Start < position && !curr.Overlaps(position)) + { + prev = curr; + curr = curr.Next; + } + + if (curr.Start >= position) + { + prev.Next = default; + + result.FirstRange = curr; + + End = prev.End; + } + else + { + result.FirstRange = new LiveRange(position, curr.End, curr.Next); + + curr.End = position; + curr.Next = default; + + End = curr.End; + } + + result.Uses = Uses.Split(position); + + AddSplitChild(result); + + Debug.Assert(!IsEmpty, "Left interval is empty after split."); + Debug.Assert(!result.IsEmpty, "Right interval is empty after split."); + + // Make sure the iterator in the new split is pointing to the start. + result.Reset(); + + return result; + } + + private void AddSplitChild(LiveInterval child) + { + Debug.Assert(!child.IsEmpty, "Trying to insert an empty interval."); + + Parent.Children.Add(child); + } + + public LiveInterval GetSplitChild(int position) + { + if (Overlaps(position)) + { + return this; + } + + foreach (LiveInterval splitChild in SplitChildren()) + { + if (splitChild.Overlaps(position)) + { + return splitChild; + } + else if (splitChild.GetStart() > position) + { + break; + } + } + + return default; + } + + public bool TrySpillWithSiblingOffset() + { + foreach (LiveInterval splitChild in SplitChildren()) + { + if (splitChild.IsSpilled) + { + Spill(splitChild.SpillOffset); + + return true; + } + } + + return false; + } + + public void Spill(int offset) + { + SpillOffset = offset; + } + + public int CompareTo(LiveInterval interval) + { + if (FirstRange == default || interval.FirstRange == default) + { + return 0; + } + + return GetStart().CompareTo(interval.GetStart()); + } + + public bool Equals(LiveInterval interval) + { + return interval._data == _data; + } + + public override bool Equals(object obj) + { + return obj is LiveInterval interval && Equals(interval); + } + + public static bool operator ==(LiveInterval a, LiveInterval b) + { + return a.Equals(b); + } + + public static bool operator !=(LiveInterval a, LiveInterval b) + { + return !a.Equals(b); + } + + public override int GetHashCode() + { + return HashCode.Combine((IntPtr)_data); + } + + public override string ToString() + { + LiveInterval self = this; + + IEnumerable<string> GetRanges() + { + LiveRange curr = self.CurrRange; + + while (curr != default) + { + if (curr == self.CurrRange) + { + yield return "*" + curr; + } + else + { + yield return curr.ToString(); + } + + curr = curr.Next; + } + } + + return string.Join(", ", GetRanges()); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/LiveIntervalList.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveIntervalList.cs new file mode 100644 index 00000000..06b979ea --- /dev/null +++ b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveIntervalList.cs @@ -0,0 +1,40 @@ +using System; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + unsafe struct LiveIntervalList + { + private LiveInterval* _items; + private int _count; + private int _capacity; + + public int Count => _count; + public Span<LiveInterval> Span => new(_items, _count); + + public void Add(LiveInterval interval) + { + if (_count + 1 > _capacity) + { + var oldSpan = Span; + + _capacity = Math.Max(4, _capacity * 2); + _items = Allocators.References.Allocate<LiveInterval>((uint)_capacity); + + var newSpan = Span; + + oldSpan.CopyTo(newSpan); + } + + int position = interval.GetStart(); + int i = _count - 1; + + while (i >= 0 && _items[i].GetStart() > position) + { + _items[i + 1] = _items[i--]; + } + + _items[i + 1] = interval; + _count++; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs new file mode 100644 index 00000000..e38b5190 --- /dev/null +++ b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs @@ -0,0 +1,74 @@ +using System; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + unsafe readonly struct LiveRange : IEquatable<LiveRange> + { + private struct Data + { + public int Start; + public int End; + public LiveRange Next; + } + + private readonly Data* _data; + + public ref int Start => ref _data->Start; + public ref int End => ref _data->End; + public ref LiveRange Next => ref _data->Next; + + public LiveRange(int start, int end, LiveRange next = default) + { + _data = Allocators.LiveRanges.Allocate<Data>(); + + Start = start; + End = end; + Next = next; + } + + public bool Overlaps(int start, int end) + { + return Start < end && start < End; + } + + public bool Overlaps(LiveRange range) + { + return Start < range.End && range.Start < End; + } + + public bool Overlaps(int position) + { + return position >= Start && position < End; + } + + public bool Equals(LiveRange range) + { + return range._data == _data; + } + + public override bool Equals(object obj) + { + return obj is LiveRange range && Equals(range); + } + + public static bool operator ==(LiveRange a, LiveRange b) + { + return a.Equals(b); + } + + public static bool operator !=(LiveRange a, LiveRange b) + { + return !a.Equals(b); + } + + public override int GetHashCode() + { + return HashCode.Combine((IntPtr)_data); + } + + public override string ToString() + { + return $"[{Start}, {End})"; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs new file mode 100644 index 00000000..bc948f95 --- /dev/null +++ b/src/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs @@ -0,0 +1,50 @@ +using ARMeilleure.IntermediateRepresentation; +using System; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + readonly struct RegisterMasks + { + public int IntAvailableRegisters { get; } + public int VecAvailableRegisters { get; } + public int IntCallerSavedRegisters { get; } + public int VecCallerSavedRegisters { get; } + public int IntCalleeSavedRegisters { get; } + public int VecCalleeSavedRegisters { get; } + public int RegistersCount { get; } + + public RegisterMasks( + int intAvailableRegisters, + int vecAvailableRegisters, + int intCallerSavedRegisters, + int vecCallerSavedRegisters, + int intCalleeSavedRegisters, + int vecCalleeSavedRegisters, + int registersCount) + { + IntAvailableRegisters = intAvailableRegisters; + VecAvailableRegisters = vecAvailableRegisters; + IntCallerSavedRegisters = intCallerSavedRegisters; + VecCallerSavedRegisters = vecCallerSavedRegisters; + IntCalleeSavedRegisters = intCalleeSavedRegisters; + VecCalleeSavedRegisters = vecCalleeSavedRegisters; + RegistersCount = registersCount; + } + + public int GetAvailableRegisters(RegisterType type) + { + if (type == RegisterType.Integer) + { + return IntAvailableRegisters; + } + else if (type == RegisterType.Vector) + { + return VecAvailableRegisters; + } + else + { + throw new ArgumentException($"Invalid register type \"{type}\"."); + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs new file mode 100644 index 00000000..038312fe --- /dev/null +++ b/src/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs @@ -0,0 +1,25 @@ +using ARMeilleure.IntermediateRepresentation; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + class StackAllocator + { + private int _offset; + + public int TotalSize => _offset; + + public int Allocate(OperandType type) + { + return Allocate(type.GetSizeInBytes()); + } + + public int Allocate(int sizeInBytes) + { + int offset = _offset; + + _offset += sizeInBytes; + + return offset; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/UseList.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/UseList.cs new file mode 100644 index 00000000..c89f0854 --- /dev/null +++ b/src/ARMeilleure/CodeGen/RegisterAllocators/UseList.cs @@ -0,0 +1,84 @@ +using System; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + unsafe struct UseList + { + private int* _items; + private int _capacity; + private int _count; + + public int Count => _count; + public int FirstUse => _count > 0 ? _items[_count - 1] : LiveInterval.NotFound; + public Span<int> Span => new(_items, _count); + + public void Add(int position) + { + if (_count + 1 > _capacity) + { + var oldSpan = Span; + + _capacity = Math.Max(4, _capacity * 2); + _items = Allocators.Default.Allocate<int>((uint)_capacity); + + var newSpan = Span; + + oldSpan.CopyTo(newSpan); + } + + // Use positions are usually inserted in descending order, so inserting in descending order is faster, + // since the number of half exchanges is reduced. + int i = _count - 1; + + while (i >= 0 && _items[i] < position) + { + _items[i + 1] = _items[i--]; + } + + _items[i + 1] = position; + _count++; + } + + public int NextUse(int position) + { + int index = NextUseIndex(position); + + return index != LiveInterval.NotFound ? _items[index] : LiveInterval.NotFound; + } + + public int NextUseIndex(int position) + { + int i = _count - 1; + + if (i == -1 || position > _items[0]) + { + return LiveInterval.NotFound; + } + + while (i >= 0 && _items[i] < position) + { + i--; + } + + return i; + } + + public UseList Split(int position) + { + int index = NextUseIndex(position); + + // Since the list is in descending order, the new split list takes the front of the list and the current + // list takes the back of the list. + UseList result = new(); + result._count = index + 1; + result._capacity = result._count; + result._items = _items; + + _count = _count - result._count; + _capacity = _count; + _items = _items + result._count; + + return result; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs b/src/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs new file mode 100644 index 00000000..3d0bc21d --- /dev/null +++ b/src/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs @@ -0,0 +1,16 @@ +namespace ARMeilleure.CodeGen.Unwinding +{ + struct UnwindInfo + { + public const int Stride = 4; // Bytes. + + public UnwindPushEntry[] PushEntries { get; } + public int PrologSize { get; } + + public UnwindInfo(UnwindPushEntry[] pushEntries, int prologSize) + { + PushEntries = pushEntries; + PrologSize = prologSize; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Unwinding/UnwindPseudoOp.cs b/src/ARMeilleure/CodeGen/Unwinding/UnwindPseudoOp.cs new file mode 100644 index 00000000..4a8288a2 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Unwinding/UnwindPseudoOp.cs @@ -0,0 +1,11 @@ +namespace ARMeilleure.CodeGen.Unwinding +{ + enum UnwindPseudoOp + { + PushReg = 0, + SetFrame = 1, + AllocStack = 2, + SaveReg = 3, + SaveXmm128 = 4 + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs b/src/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs new file mode 100644 index 00000000..fd8ea402 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs @@ -0,0 +1,20 @@ +namespace ARMeilleure.CodeGen.Unwinding +{ + struct UnwindPushEntry + { + public const int Stride = 16; // Bytes. + + public UnwindPseudoOp PseudoOp { get; } + public int PrologOffset { get; } + public int RegIndex { get; } + public int StackOffsetOrAllocSize { get; } + + public UnwindPushEntry(UnwindPseudoOp pseudoOp, int prologOffset, int regIndex = -1, int stackOffsetOrAllocSize = -1) + { + PseudoOp = pseudoOp; + PrologOffset = prologOffset; + RegIndex = regIndex; + StackOffsetOrAllocSize = stackOffsetOrAllocSize; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/Assembler.cs b/src/ARMeilleure/CodeGen/X86/Assembler.cs new file mode 100644 index 00000000..67736a31 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/Assembler.cs @@ -0,0 +1,1559 @@ +using ARMeilleure.CodeGen.Linking; +using ARMeilleure.IntermediateRepresentation; +using Ryujinx.Common.Memory; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Runtime.InteropServices; + +namespace ARMeilleure.CodeGen.X86 +{ + partial class Assembler + { + private const int ReservedBytesForJump = 1; + + private const int OpModRMBits = 24; + + private const byte RexPrefix = 0x40; + private const byte RexWPrefix = 0x48; + private const byte LockPrefix = 0xf0; + + private const int MaxRegNumber = 15; + + private struct Jump + { + public bool IsConditional { get; } + public X86Condition Condition { get; } + public Operand JumpLabel { get; } + public long? JumpTarget { get; set; } + public long JumpPosition { get; } + public long Offset { get; set; } + public int InstSize { get; set; } + + public Jump(Operand jumpLabel, long jumpPosition) + { + IsConditional = false; + Condition = 0; + JumpLabel = jumpLabel; + JumpTarget = null; + JumpPosition = jumpPosition; + + Offset = 0; + InstSize = 0; + } + + public Jump(X86Condition condition, Operand jumpLabel, long jumpPosition) + { + IsConditional = true; + Condition = condition; + JumpLabel = jumpLabel; + JumpTarget = null; + JumpPosition = jumpPosition; + + Offset = 0; + InstSize = 0; + } + } + + private struct Reloc + { + public int JumpIndex { get; set; } + public int Position { get; set; } + public Symbol Symbol { get; set; } + } + + private readonly List<Jump> _jumps; + private readonly List<Reloc> _relocs; + private readonly Dictionary<Operand, long> _labels; + private readonly Stream _stream; + + public bool HasRelocs => _relocs != null; + + public Assembler(Stream stream, bool relocatable) + { + _stream = stream; + _labels = new Dictionary<Operand, long>(); + _jumps = new List<Jump>(); + + _relocs = relocatable ? new List<Reloc>() : null; + } + + public void MarkLabel(Operand label) + { + _labels.Add(label, _stream.Position); + } + + public void Add(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Add); + } + + public void Addsd(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Addsd); + } + + public void Addss(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Addss); + } + + public void And(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.And); + } + + public void Bsr(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Bsr); + } + + public void Bswap(Operand dest) + { + WriteInstruction(dest, default, dest.Type, X86Instruction.Bswap); + } + + public void Call(Operand dest) + { + WriteInstruction(dest, default, OperandType.None, X86Instruction.Call); + } + + public void Cdq() + { + WriteByte(0x99); + } + + public void Cmovcc(Operand dest, Operand source, OperandType type, X86Condition condition) + { + ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Cmovcc]; + + WriteOpCode(dest, default, source, type, info.Flags, info.OpRRM | (int)condition, rrm: true); + } + + public void Cmp(Operand src1, Operand src2, OperandType type) + { + WriteInstruction(src1, src2, type, X86Instruction.Cmp); + } + + public void Cqo() + { + WriteByte(0x48); + WriteByte(0x99); + } + + public void Cmpxchg(Operand memOp, Operand src) + { + Debug.Assert(memOp.Kind == OperandKind.Memory); + + WriteByte(LockPrefix); + + WriteInstruction(memOp, src, src.Type, X86Instruction.Cmpxchg); + } + + public void Cmpxchg16(Operand memOp, Operand src) + { + Debug.Assert(memOp.Kind == OperandKind.Memory); + + WriteByte(LockPrefix); + WriteByte(0x66); + + WriteInstruction(memOp, src, src.Type, X86Instruction.Cmpxchg); + } + + public void Cmpxchg16b(Operand memOp) + { + Debug.Assert(memOp.Kind == OperandKind.Memory); + + WriteByte(LockPrefix); + + WriteInstruction(memOp, default, OperandType.None, X86Instruction.Cmpxchg16b); + } + + public void Cmpxchg8(Operand memOp, Operand src) + { + Debug.Assert(memOp.Kind == OperandKind.Memory); + + WriteByte(LockPrefix); + + WriteInstruction(memOp, src, src.Type, X86Instruction.Cmpxchg8); + } + + public void Comisd(Operand src1, Operand src2) + { + WriteInstruction(src1, default, src2, X86Instruction.Comisd); + } + + public void Comiss(Operand src1, Operand src2) + { + WriteInstruction(src1, default, src2, X86Instruction.Comiss); + } + + public void Cvtsd2ss(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Cvtsd2ss); + } + + public void Cvtsi2sd(Operand dest, Operand src1, Operand src2, OperandType type) + { + WriteInstruction(dest, src1, src2, X86Instruction.Cvtsi2sd, type); + } + + public void Cvtsi2ss(Operand dest, Operand src1, Operand src2, OperandType type) + { + WriteInstruction(dest, src1, src2, X86Instruction.Cvtsi2ss, type); + } + + public void Cvtss2sd(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Cvtss2sd); + } + + public void Div(Operand source) + { + WriteInstruction(default, source, source.Type, X86Instruction.Div); + } + + public void Divsd(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Divsd); + } + + public void Divss(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Divss); + } + + public void Idiv(Operand source) + { + WriteInstruction(default, source, source.Type, X86Instruction.Idiv); + } + + public void Imul(Operand source) + { + WriteInstruction(default, source, source.Type, X86Instruction.Imul128); + } + + public void Imul(Operand dest, Operand source, OperandType type) + { + if (source.Kind != OperandKind.Register) + { + throw new ArgumentException($"Invalid source operand kind \"{source.Kind}\"."); + } + + WriteInstruction(dest, source, type, X86Instruction.Imul); + } + + public void Imul(Operand dest, Operand src1, Operand src2, OperandType type) + { + ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Imul]; + + if (src2.Kind != OperandKind.Constant) + { + throw new ArgumentException($"Invalid source 2 operand kind \"{src2.Kind}\"."); + } + + if (IsImm8(src2.Value, src2.Type) && info.OpRMImm8 != BadOp) + { + WriteOpCode(dest, default, src1, type, info.Flags, info.OpRMImm8, rrm: true); + + WriteByte(src2.AsByte()); + } + else if (IsImm32(src2.Value, src2.Type) && info.OpRMImm32 != BadOp) + { + WriteOpCode(dest, default, src1, type, info.Flags, info.OpRMImm32, rrm: true); + + WriteInt32(src2.AsInt32()); + } + else + { + throw new ArgumentException($"Failed to encode constant 0x{src2.Value:X}."); + } + } + + public void Insertps(Operand dest, Operand src1, Operand src2, byte imm) + { + WriteInstruction(dest, src1, src2, X86Instruction.Insertps); + + WriteByte(imm); + } + + public void Jcc(X86Condition condition, Operand dest) + { + if (dest.Kind == OperandKind.Label) + { + _jumps.Add(new Jump(condition, dest, _stream.Position)); + + // ReservedBytesForJump + WriteByte(0); + } + else + { + throw new ArgumentException("Destination operand must be of kind Label", nameof(dest)); + } + } + + public void Jcc(X86Condition condition, long offset) + { + if (ConstFitsOnS8(offset)) + { + WriteByte((byte)(0x70 | (int)condition)); + + WriteByte((byte)offset); + } + else if (ConstFitsOnS32(offset)) + { + WriteByte(0x0f); + WriteByte((byte)(0x80 | (int)condition)); + + WriteInt32((int)offset); + } + else + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } + } + + public void Jmp(long offset) + { + if (ConstFitsOnS8(offset)) + { + WriteByte(0xeb); + + WriteByte((byte)offset); + } + else if (ConstFitsOnS32(offset)) + { + WriteByte(0xe9); + + WriteInt32((int)offset); + } + else + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } + } + + public void Jmp(Operand dest) + { + if (dest.Kind == OperandKind.Label) + { + _jumps.Add(new Jump(dest, _stream.Position)); + + // ReservedBytesForJump + WriteByte(0); + } + else + { + WriteInstruction(dest, default, OperandType.None, X86Instruction.Jmp); + } + } + + public void Ldmxcsr(Operand dest) + { + WriteInstruction(dest, default, OperandType.I32, X86Instruction.Ldmxcsr); + } + + public void Lea(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Lea); + } + + public void LockOr(Operand dest, Operand source, OperandType type) + { + WriteByte(LockPrefix); + WriteInstruction(dest, source, type, X86Instruction.Or); + } + + public void Mov(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Mov); + } + + public void Mov16(Operand dest, Operand source) + { + WriteInstruction(dest, source, OperandType.None, X86Instruction.Mov16); + } + + public void Mov8(Operand dest, Operand source) + { + WriteInstruction(dest, source, OperandType.None, X86Instruction.Mov8); + } + + public void Movd(Operand dest, Operand source) + { + ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Movd]; + + if (source.Type.IsInteger() || source.Kind == OperandKind.Memory) + { + WriteOpCode(dest, default, source, OperandType.None, info.Flags, info.OpRRM, rrm: true); + } + else + { + WriteOpCode(dest, default, source, OperandType.None, info.Flags, info.OpRMR); + } + } + + public void Movdqu(Operand dest, Operand source) + { + WriteInstruction(dest, default, source, X86Instruction.Movdqu); + } + + public void Movhlps(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Movhlps); + } + + public void Movlhps(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Movlhps); + } + + public void Movq(Operand dest, Operand source) + { + ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Movd]; + + InstructionFlags flags = info.Flags | InstructionFlags.RexW; + + if (source.Type.IsInteger() || source.Kind == OperandKind.Memory) + { + WriteOpCode(dest, default, source, OperandType.None, flags, info.OpRRM, rrm: true); + } + else if (dest.Type.IsInteger() || dest.Kind == OperandKind.Memory) + { + WriteOpCode(dest, default, source, OperandType.None, flags, info.OpRMR); + } + else + { + WriteInstruction(dest, source, OperandType.None, X86Instruction.Movq); + } + } + + public void Movsd(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Movsd); + } + + public void Movss(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Movss); + } + + public void Movsx16(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Movsx16); + } + + public void Movsx32(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Movsx32); + } + + public void Movsx8(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Movsx8); + } + + public void Movzx16(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Movzx16); + } + + public void Movzx8(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Movzx8); + } + + public void Mul(Operand source) + { + WriteInstruction(default, source, source.Type, X86Instruction.Mul128); + } + + public void Mulsd(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Mulsd); + } + + public void Mulss(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Mulss); + } + + public void Neg(Operand dest) + { + WriteInstruction(dest, default, dest.Type, X86Instruction.Neg); + } + + public void Not(Operand dest) + { + WriteInstruction(dest, default, dest.Type, X86Instruction.Not); + } + + public void Or(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Or); + } + + public void Pclmulqdq(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, default, source, X86Instruction.Pclmulqdq); + + WriteByte(imm); + } + + public void Pcmpeqw(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Pcmpeqw); + } + + public void Pextrb(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, default, source, X86Instruction.Pextrb); + + WriteByte(imm); + } + + public void Pextrd(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, default, source, X86Instruction.Pextrd); + + WriteByte(imm); + } + + public void Pextrq(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, default, source, X86Instruction.Pextrq); + + WriteByte(imm); + } + + public void Pextrw(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, default, source, X86Instruction.Pextrw); + + WriteByte(imm); + } + + public void Pinsrb(Operand dest, Operand src1, Operand src2, byte imm) + { + WriteInstruction(dest, src1, src2, X86Instruction.Pinsrb); + + WriteByte(imm); + } + + public void Pinsrd(Operand dest, Operand src1, Operand src2, byte imm) + { + WriteInstruction(dest, src1, src2, X86Instruction.Pinsrd); + + WriteByte(imm); + } + + public void Pinsrq(Operand dest, Operand src1, Operand src2, byte imm) + { + WriteInstruction(dest, src1, src2, X86Instruction.Pinsrq); + + WriteByte(imm); + } + + public void Pinsrw(Operand dest, Operand src1, Operand src2, byte imm) + { + WriteInstruction(dest, src1, src2, X86Instruction.Pinsrw); + + WriteByte(imm); + } + + public void Pop(Operand dest) + { + if (dest.Kind == OperandKind.Register) + { + WriteCompactInst(dest, 0x58); + } + else + { + WriteInstruction(dest, default, dest.Type, X86Instruction.Pop); + } + } + + public void Popcnt(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Popcnt); + } + + public void Pshufd(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, default, source, X86Instruction.Pshufd); + + WriteByte(imm); + } + + public void Push(Operand source) + { + if (source.Kind == OperandKind.Register) + { + WriteCompactInst(source, 0x50); + } + else + { + WriteInstruction(default, source, source.Type, X86Instruction.Push); + } + } + + public void Return() + { + WriteByte(0xc3); + } + + public void Ror(Operand dest, Operand source, OperandType type) + { + WriteShiftInst(dest, source, type, X86Instruction.Ror); + } + + public void Sar(Operand dest, Operand source, OperandType type) + { + WriteShiftInst(dest, source, type, X86Instruction.Sar); + } + + public void Shl(Operand dest, Operand source, OperandType type) + { + WriteShiftInst(dest, source, type, X86Instruction.Shl); + } + + public void Shr(Operand dest, Operand source, OperandType type) + { + WriteShiftInst(dest, source, type, X86Instruction.Shr); + } + + public void Setcc(Operand dest, X86Condition condition) + { + ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Setcc]; + + WriteOpCode(dest, default, default, OperandType.None, info.Flags, info.OpRRM | (int)condition); + } + + public void Stmxcsr(Operand dest) + { + WriteInstruction(dest, default, OperandType.I32, X86Instruction.Stmxcsr); + } + + public void Sub(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Sub); + } + + public void Subsd(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Subsd); + } + + public void Subss(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Subss); + } + + public void Test(Operand src1, Operand src2, OperandType type) + { + WriteInstruction(src1, src2, type, X86Instruction.Test); + } + + public void Xor(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Xor); + } + + public void Xorps(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Xorps); + } + + public void WriteInstruction( + X86Instruction inst, + Operand dest, + Operand source, + OperandType type = OperandType.None) + { + WriteInstruction(dest, default, source, inst, type); + } + + public void WriteInstruction(X86Instruction inst, Operand dest, Operand src1, Operand src2) + { + if (src2.Kind == OperandKind.Constant) + { + WriteInstruction(src1, dest, src2, inst); + } + else + { + WriteInstruction(dest, src1, src2, inst); + } + } + + public void WriteInstruction( + X86Instruction inst, + Operand dest, + Operand src1, + Operand src2, + OperandType type) + { + WriteInstruction(dest, src1, src2, inst, type); + } + + public void WriteInstruction(X86Instruction inst, Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, default, source, inst); + + WriteByte(imm); + } + + public void WriteInstruction( + X86Instruction inst, + Operand dest, + Operand src1, + Operand src2, + Operand src3) + { + // 3+ operands can only be encoded with the VEX encoding scheme. + Debug.Assert(HardwareCapabilities.SupportsVexEncoding); + + WriteInstruction(dest, src1, src2, inst); + + WriteByte((byte)(src3.AsByte() << 4)); + } + + public void WriteInstruction( + X86Instruction inst, + Operand dest, + Operand src1, + Operand src2, + byte imm) + { + WriteInstruction(dest, src1, src2, inst); + + WriteByte(imm); + } + + private void WriteShiftInst(Operand dest, Operand source, OperandType type, X86Instruction inst) + { + if (source.Kind == OperandKind.Register) + { + X86Register shiftReg = (X86Register)source.GetRegister().Index; + + Debug.Assert(shiftReg == X86Register.Rcx, $"Invalid shift register \"{shiftReg}\"."); + + source = default; + } + else if (source.Kind == OperandKind.Constant) + { + source = Operand.Factory.Const((int)source.Value & (dest.Type == OperandType.I32 ? 0x1f : 0x3f)); + } + + WriteInstruction(dest, source, type, inst); + } + + private void WriteInstruction(Operand dest, Operand source, OperandType type, X86Instruction inst) + { + ref readonly InstructionInfo info = ref _instTable[(int)inst]; + + if (source != default) + { + if (source.Kind == OperandKind.Constant) + { + ulong imm = source.Value; + + if (inst == X86Instruction.Mov8) + { + WriteOpCode(dest, default, default, type, info.Flags, info.OpRMImm8); + + WriteByte((byte)imm); + } + else if (inst == X86Instruction.Mov16) + { + WriteOpCode(dest, default, default, type, info.Flags, info.OpRMImm32); + + WriteInt16((short)imm); + } + else if (IsImm8(imm, type) && info.OpRMImm8 != BadOp) + { + WriteOpCode(dest, default, default, type, info.Flags, info.OpRMImm8); + + WriteByte((byte)imm); + } + else if (!source.Relocatable && IsImm32(imm, type) && info.OpRMImm32 != BadOp) + { + WriteOpCode(dest, default, default, type, info.Flags, info.OpRMImm32); + + WriteInt32((int)imm); + } + else if (dest != default && dest.Kind == OperandKind.Register && info.OpRImm64 != BadOp) + { + int rexPrefix = GetRexPrefix(dest, source, type, rrm: false); + + if (rexPrefix != 0) + { + WriteByte((byte)rexPrefix); + } + + WriteByte((byte)(info.OpRImm64 + (dest.GetRegister().Index & 0b111))); + + if (HasRelocs && source.Relocatable) + { + _relocs.Add(new Reloc + { + JumpIndex = _jumps.Count - 1, + Position = (int)_stream.Position, + Symbol = source.Symbol + }); + } + + WriteUInt64(imm); + } + else + { + throw new ArgumentException($"Failed to encode constant 0x{imm:X}."); + } + } + else if (source.Kind == OperandKind.Register && info.OpRMR != BadOp) + { + WriteOpCode(dest, default, source, type, info.Flags, info.OpRMR); + } + else if (info.OpRRM != BadOp) + { + WriteOpCode(dest, default, source, type, info.Flags, info.OpRRM, rrm: true); + } + else + { + throw new ArgumentException($"Invalid source operand kind \"{source.Kind}\"."); + } + } + else if (info.OpRRM != BadOp) + { + WriteOpCode(dest, default, source, type, info.Flags, info.OpRRM, rrm: true); + } + else if (info.OpRMR != BadOp) + { + WriteOpCode(dest, default, source, type, info.Flags, info.OpRMR); + } + else + { + throw new ArgumentNullException(nameof(source)); + } + } + + private void WriteInstruction( + Operand dest, + Operand src1, + Operand src2, + X86Instruction inst, + OperandType type = OperandType.None) + { + ref readonly InstructionInfo info = ref _instTable[(int)inst]; + + if (src2 != default) + { + if (src2.Kind == OperandKind.Constant) + { + ulong imm = src2.Value; + + if ((byte)imm == imm && info.OpRMImm8 != BadOp) + { + WriteOpCode(dest, src1, default, type, info.Flags, info.OpRMImm8); + + WriteByte((byte)imm); + } + else + { + throw new ArgumentException($"Failed to encode constant 0x{imm:X}."); + } + } + else if (src2.Kind == OperandKind.Register && info.OpRMR != BadOp) + { + WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRMR); + } + else if (info.OpRRM != BadOp) + { + WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRRM, rrm: true); + } + else + { + throw new ArgumentException($"Invalid source operand kind \"{src2.Kind}\"."); + } + } + else if (info.OpRRM != BadOp) + { + WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRRM, rrm: true); + } + else if (info.OpRMR != BadOp) + { + WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRMR); + } + else + { + throw new ArgumentNullException(nameof(src2)); + } + } + + private void WriteOpCode( + Operand dest, + Operand src1, + Operand src2, + OperandType type, + InstructionFlags flags, + int opCode, + bool rrm = false) + { + int rexPrefix = GetRexPrefix(dest, src2, type, rrm); + + if ((flags & InstructionFlags.RexW) != 0) + { + rexPrefix |= RexWPrefix; + } + + int modRM = (opCode >> OpModRMBits) << 3; + + MemoryOperand memOp = default; + bool hasMemOp = false; + + if (dest != default) + { + if (dest.Kind == OperandKind.Register) + { + int regIndex = dest.GetRegister().Index; + + modRM |= (regIndex & 0b111) << (rrm ? 3 : 0); + + if ((flags & InstructionFlags.Reg8Dest) != 0 && regIndex >= 4) + { + rexPrefix |= RexPrefix; + } + } + else if (dest.Kind == OperandKind.Memory) + { + memOp = dest.GetMemory(); + hasMemOp = true; + } + else + { + throw new ArgumentException("Invalid destination operand kind \"" + dest.Kind + "\"."); + } + } + + if (src2 != default) + { + if (src2.Kind == OperandKind.Register) + { + int regIndex = src2.GetRegister().Index; + + modRM |= (regIndex & 0b111) << (rrm ? 0 : 3); + + if ((flags & InstructionFlags.Reg8Src) != 0 && regIndex >= 4) + { + rexPrefix |= RexPrefix; + } + } + else if (src2.Kind == OperandKind.Memory && !hasMemOp) + { + memOp = src2.GetMemory(); + hasMemOp = true; + } + else + { + throw new ArgumentException("Invalid source operand kind \"" + src2.Kind + "\"."); + } + } + + bool needsSibByte = false; + bool needsDisplacement = false; + + int sib = 0; + + if (hasMemOp) + { + // Either source or destination is a memory operand. + Register baseReg = memOp.BaseAddress.GetRegister(); + + X86Register baseRegLow = (X86Register)(baseReg.Index & 0b111); + + needsSibByte = memOp.Index != default || baseRegLow == X86Register.Rsp; + needsDisplacement = memOp.Displacement != 0 || baseRegLow == X86Register.Rbp; + + if (needsDisplacement) + { + if (ConstFitsOnS8(memOp.Displacement)) + { + modRM |= 0x40; + } + else /* if (ConstFitsOnS32(memOp.Displacement)) */ + { + modRM |= 0x80; + } + } + + if (baseReg.Index >= 8) + { + Debug.Assert((uint)baseReg.Index <= MaxRegNumber); + + rexPrefix |= RexPrefix | (baseReg.Index >> 3); + } + + if (needsSibByte) + { + sib = (int)baseRegLow; + + if (memOp.Index != default) + { + int indexReg = memOp.Index.GetRegister().Index; + + Debug.Assert(indexReg != (int)X86Register.Rsp, "Using RSP as index register on the memory operand is not allowed."); + + if (indexReg >= 8) + { + Debug.Assert((uint)indexReg <= MaxRegNumber); + + rexPrefix |= RexPrefix | (indexReg >> 3) << 1; + } + + sib |= (indexReg & 0b111) << 3; + } + else + { + sib |= 0b100 << 3; + } + + sib |= (int)memOp.Scale << 6; + + modRM |= 0b100; + } + else + { + modRM |= (int)baseRegLow; + } + } + else + { + // Source and destination are registers. + modRM |= 0xc0; + } + + Debug.Assert(opCode != BadOp, "Invalid opcode value."); + + if ((flags & InstructionFlags.Evex) != 0 && HardwareCapabilities.SupportsEvexEncoding) + { + WriteEvexInst(dest, src1, src2, type, flags, opCode); + + opCode &= 0xff; + } + else if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding) + { + // In a vex encoding, only one prefix can be active at a time. The active prefix is encoded in the second byte using two bits. + + int vexByte2 = (flags & InstructionFlags.PrefixMask) switch + { + InstructionFlags.Prefix66 => 1, + InstructionFlags.PrefixF3 => 2, + InstructionFlags.PrefixF2 => 3, + _ => 0 + }; + + if (src1 != default) + { + vexByte2 |= (src1.GetRegister().Index ^ 0xf) << 3; + } + else + { + vexByte2 |= 0b1111 << 3; + } + + ushort opCodeHigh = (ushort)(opCode >> 8); + + if ((rexPrefix & 0b1011) == 0 && opCodeHigh == 0xf) + { + // Two-byte form. + WriteByte(0xc5); + + vexByte2 |= (~rexPrefix & 4) << 5; + + WriteByte((byte)vexByte2); + } + else + { + // Three-byte form. + WriteByte(0xc4); + + int vexByte1 = (~rexPrefix & 7) << 5; + + switch (opCodeHigh) + { + case 0xf: vexByte1 |= 1; break; + case 0xf38: vexByte1 |= 2; break; + case 0xf3a: vexByte1 |= 3; break; + + default: Debug.Assert(false, $"Failed to VEX encode opcode 0x{opCode:X}."); break; + } + + vexByte2 |= (rexPrefix & 8) << 4; + + WriteByte((byte)vexByte1); + WriteByte((byte)vexByte2); + } + + opCode &= 0xff; + } + else + { + if (flags.HasFlag(InstructionFlags.Prefix66)) + { + WriteByte(0x66); + } + + if (flags.HasFlag(InstructionFlags.PrefixF2)) + { + WriteByte(0xf2); + } + + if (flags.HasFlag(InstructionFlags.PrefixF3)) + { + WriteByte(0xf3); + } + + if (rexPrefix != 0) + { + WriteByte((byte)rexPrefix); + } + } + + if (dest != default && (flags & InstructionFlags.RegOnly) != 0) + { + opCode += dest.GetRegister().Index & 7; + } + + if ((opCode & 0xff0000) != 0) + { + WriteByte((byte)(opCode >> 16)); + } + + if ((opCode & 0xff00) != 0) + { + WriteByte((byte)(opCode >> 8)); + } + + WriteByte((byte)opCode); + + if ((flags & InstructionFlags.RegOnly) == 0) + { + WriteByte((byte)modRM); + + if (needsSibByte) + { + WriteByte((byte)sib); + } + + if (needsDisplacement) + { + if (ConstFitsOnS8(memOp.Displacement)) + { + WriteByte((byte)memOp.Displacement); + } + else /* if (ConstFitsOnS32(memOp.Displacement)) */ + { + WriteInt32(memOp.Displacement); + } + } + } + } + + private void WriteEvexInst( + Operand dest, + Operand src1, + Operand src2, + OperandType type, + InstructionFlags flags, + int opCode, + bool broadcast = false, + int registerWidth = 128, + int maskRegisterIdx = 0, + bool zeroElements = false) + { + int op1Idx = dest.GetRegister().Index; + int op2Idx = src1.GetRegister().Index; + int op3Idx = src2.GetRegister().Index; + + WriteByte(0x62); + + // P0 + // Extend operand 1 register + bool r = (op1Idx & 8) == 0; + // Extend operand 3 register + bool x = (op3Idx & 16) == 0; + // Extend operand 3 register + bool b = (op3Idx & 8) == 0; + // Extend operand 1 register + bool rp = (op1Idx & 16) == 0; + // Escape code index + byte mm = 0b00; + + switch ((ushort)(opCode >> 8)) + { + case 0xf00: mm = 0b01; break; + case 0xf38: mm = 0b10; break; + case 0xf3a: mm = 0b11; break; + + default: Debug.Fail($"Failed to EVEX encode opcode 0x{opCode:X}."); break; + } + + WriteByte( + (byte)( + (r ? 0x80 : 0) | + (x ? 0x40 : 0) | + (b ? 0x20 : 0) | + (rp ? 0x10 : 0) | + mm)); + + // P1 + // Specify 64-bit lane mode + bool w = Is64Bits(type); + // Operand 2 register index + byte vvvv = (byte)(~op2Idx & 0b1111); + // Opcode prefix + byte pp = (flags & InstructionFlags.PrefixMask) switch + { + InstructionFlags.Prefix66 => 0b01, + InstructionFlags.PrefixF3 => 0b10, + InstructionFlags.PrefixF2 => 0b11, + _ => 0 + }; + WriteByte( + (byte)( + (w ? 0x80 : 0) | + (vvvv << 3) | + 0b100 | + pp)); + + // P2 + // Mask register determines what elements to zero, rather than what elements to merge + bool z = zeroElements; + // Specifies register-width + byte ll = 0b00; + switch (registerWidth) + { + case 128: ll = 0b00; break; + case 256: ll = 0b01; break; + case 512: ll = 0b10; break; + + default: Debug.Fail($"Invalid EVEX vector register width {registerWidth}."); break; + } + // Embedded broadcast in the case of a memory operand + bool bcast = broadcast; + // Extend operand 2 register + bool vp = (op2Idx & 16) == 0; + // Mask register index + Debug.Assert(maskRegisterIdx < 8, $"Invalid mask register index {maskRegisterIdx}."); + byte aaa = (byte)(maskRegisterIdx & 0b111); + + WriteByte( + (byte)( + (z ? 0x80 : 0) | + (ll << 5) | + (bcast ? 0x10 : 0) | + (vp ? 8 : 0) | + aaa)); + } + + private void WriteCompactInst(Operand operand, int opCode) + { + int regIndex = operand.GetRegister().Index; + + if (regIndex >= 8) + { + WriteByte(0x41); + } + + WriteByte((byte)(opCode + (regIndex & 0b111))); + } + + private static int GetRexPrefix(Operand dest, Operand source, OperandType type, bool rrm) + { + int rexPrefix = 0; + + if (Is64Bits(type)) + { + rexPrefix = RexWPrefix; + } + + void SetRegisterHighBit(Register reg, int bit) + { + if (reg.Index >= 8) + { + rexPrefix |= RexPrefix | (reg.Index >> 3) << bit; + } + } + + if (dest != default && dest.Kind == OperandKind.Register) + { + SetRegisterHighBit(dest.GetRegister(), rrm ? 2 : 0); + } + + if (source != default && source.Kind == OperandKind.Register) + { + SetRegisterHighBit(source.GetRegister(), rrm ? 0 : 2); + } + + return rexPrefix; + } + + public (byte[], RelocInfo) GetCode() + { + var jumps = CollectionsMarshal.AsSpan(_jumps); + var relocs = CollectionsMarshal.AsSpan(_relocs); + + // Write jump relative offsets. + bool modified; + + do + { + modified = false; + + for (int i = 0; i < jumps.Length; i++) + { + ref Jump jump = ref jumps[i]; + + // If jump target not resolved yet, resolve it. + if (jump.JumpTarget == null) + { + jump.JumpTarget = _labels[jump.JumpLabel]; + } + + long jumpTarget = jump.JumpTarget.Value; + long offset = jumpTarget - jump.JumpPosition; + + if (offset < 0) + { + for (int j = i - 1; j >= 0; j--) + { + ref Jump jump2 = ref jumps[j]; + + if (jump2.JumpPosition < jumpTarget) + { + break; + } + + offset -= jump2.InstSize - ReservedBytesForJump; + } + } + else + { + for (int j = i + 1; j < jumps.Length; j++) + { + ref Jump jump2 = ref jumps[j]; + + if (jump2.JumpPosition >= jumpTarget) + { + break; + } + + offset += jump2.InstSize - ReservedBytesForJump; + } + + offset -= ReservedBytesForJump; + } + + if (jump.IsConditional) + { + jump.InstSize = GetJccLength(offset); + } + else + { + jump.InstSize = GetJmpLength(offset); + } + + // The jump is relative to the next instruction, not the current one. + // Since we didn't know the next instruction address when calculating + // the offset (as the size of the current jump instruction was not known), + // we now need to compensate the offset with the jump instruction size. + // It's also worth noting that: + // - This is only needed for backward jumps. + // - The GetJmpLength and GetJccLength also compensates the offset + // internally when computing the jump instruction size. + if (offset < 0) + { + offset -= jump.InstSize; + } + + if (jump.Offset != offset) + { + jump.Offset = offset; + + modified = true; + } + } + } + while (modified); + + // Write the code, ignoring the dummy bytes after jumps, into a new stream. + _stream.Seek(0, SeekOrigin.Begin); + + using var codeStream = MemoryStreamManager.Shared.GetStream(); + var assembler = new Assembler(codeStream, HasRelocs); + + bool hasRelocs = HasRelocs; + int relocIndex = 0; + int relocOffset = 0; + var relocEntries = hasRelocs + ? new RelocEntry[relocs.Length] + : Array.Empty<RelocEntry>(); + + for (int i = 0; i < jumps.Length; i++) + { + ref Jump jump = ref jumps[i]; + + // If has relocations, calculate their new positions compensating for jumps. + if (hasRelocs) + { + relocOffset += jump.InstSize - ReservedBytesForJump; + + for (; relocIndex < relocEntries.Length; relocIndex++) + { + ref Reloc reloc = ref relocs[relocIndex]; + + if (reloc.JumpIndex > i) + { + break; + } + + relocEntries[relocIndex] = new RelocEntry(reloc.Position + relocOffset, reloc.Symbol); + } + } + + Span<byte> buffer = new byte[jump.JumpPosition - _stream.Position]; + + _stream.Read(buffer); + _stream.Seek(ReservedBytesForJump, SeekOrigin.Current); + + codeStream.Write(buffer); + + if (jump.IsConditional) + { + assembler.Jcc(jump.Condition, jump.Offset); + } + else + { + assembler.Jmp(jump.Offset); + } + } + + // Write remaining relocations. This case happens when there are no jumps assembled. + for (; relocIndex < relocEntries.Length; relocIndex++) + { + ref Reloc reloc = ref relocs[relocIndex]; + + relocEntries[relocIndex] = new RelocEntry(reloc.Position + relocOffset, reloc.Symbol); + } + + _stream.CopyTo(codeStream); + + var code = codeStream.ToArray(); + var relocInfo = new RelocInfo(relocEntries); + + return (code, relocInfo); + } + + private static bool Is64Bits(OperandType type) + { + return type == OperandType.I64 || type == OperandType.FP64; + } + + private static bool IsImm8(ulong immediate, OperandType type) + { + long value = type == OperandType.I32 ? (int)immediate : (long)immediate; + + return ConstFitsOnS8(value); + } + + private static bool IsImm32(ulong immediate, OperandType type) + { + long value = type == OperandType.I32 ? (int)immediate : (long)immediate; + + return ConstFitsOnS32(value); + } + + private static int GetJccLength(long offset) + { + if (ConstFitsOnS8(offset < 0 ? offset - 2 : offset)) + { + return 2; + } + else if (ConstFitsOnS32(offset < 0 ? offset - 6 : offset)) + { + return 6; + } + else + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } + } + + private static int GetJmpLength(long offset) + { + if (ConstFitsOnS8(offset < 0 ? offset - 2 : offset)) + { + return 2; + } + else if (ConstFitsOnS32(offset < 0 ? offset - 5 : offset)) + { + return 5; + } + else + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } + } + + private static bool ConstFitsOnS8(long value) + { + return value == (sbyte)value; + } + + private static bool ConstFitsOnS32(long value) + { + return value == (int)value; + } + + private void WriteInt16(short value) + { + WriteUInt16((ushort)value); + } + + private void WriteInt32(int value) + { + WriteUInt32((uint)value); + } + + private void WriteByte(byte value) + { + _stream.WriteByte(value); + } + + private void WriteUInt16(ushort value) + { + _stream.WriteByte((byte)(value >> 0)); + _stream.WriteByte((byte)(value >> 8)); + } + + private void WriteUInt32(uint value) + { + _stream.WriteByte((byte)(value >> 0)); + _stream.WriteByte((byte)(value >> 8)); + _stream.WriteByte((byte)(value >> 16)); + _stream.WriteByte((byte)(value >> 24)); + } + + private void WriteUInt64(ulong value) + { + _stream.WriteByte((byte)(value >> 0)); + _stream.WriteByte((byte)(value >> 8)); + _stream.WriteByte((byte)(value >> 16)); + _stream.WriteByte((byte)(value >> 24)); + _stream.WriteByte((byte)(value >> 32)); + _stream.WriteByte((byte)(value >> 40)); + _stream.WriteByte((byte)(value >> 48)); + _stream.WriteByte((byte)(value >> 56)); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/AssemblerTable.cs b/src/ARMeilleure/CodeGen/X86/AssemblerTable.cs new file mode 100644 index 00000000..e6a2ff07 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/AssemblerTable.cs @@ -0,0 +1,295 @@ +using System; + +namespace ARMeilleure.CodeGen.X86 +{ + partial class Assembler + { + public static bool SupportsVexPrefix(X86Instruction inst) + { + return _instTable[(int)inst].Flags.HasFlag(InstructionFlags.Vex); + } + + private const int BadOp = 0; + + [Flags] + private enum InstructionFlags + { + None = 0, + RegOnly = 1 << 0, + Reg8Src = 1 << 1, + Reg8Dest = 1 << 2, + RexW = 1 << 3, + Vex = 1 << 4, + Evex = 1 << 5, + + PrefixBit = 16, + PrefixMask = 7 << PrefixBit, + Prefix66 = 1 << PrefixBit, + PrefixF3 = 2 << PrefixBit, + PrefixF2 = 4 << PrefixBit + } + + private readonly struct InstructionInfo + { + public int OpRMR { get; } + public int OpRMImm8 { get; } + public int OpRMImm32 { get; } + public int OpRImm64 { get; } + public int OpRRM { get; } + + public InstructionFlags Flags { get; } + + public InstructionInfo( + int opRMR, + int opRMImm8, + int opRMImm32, + int opRImm64, + int opRRM, + InstructionFlags flags) + { + OpRMR = opRMR; + OpRMImm8 = opRMImm8; + OpRMImm32 = opRMImm32; + OpRImm64 = opRImm64; + OpRRM = opRRM; + Flags = flags; + } + } + + private readonly static InstructionInfo[] _instTable; + + static Assembler() + { + _instTable = new InstructionInfo[(int)X86Instruction.Count]; + + // Name RM/R RM/I8 RM/I32 R/I64 R/RM Flags + Add(X86Instruction.Add, new InstructionInfo(0x00000001, 0x00000083, 0x00000081, BadOp, 0x00000003, InstructionFlags.None)); + Add(X86Instruction.Addpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Addps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex)); + Add(X86Instruction.Addsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Addss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Aesdec, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38de, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Aesdeclast, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38df, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Aesenc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38dc, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Aesenclast, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38dd, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Aesimc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38db, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.And, new InstructionInfo(0x00000021, 0x04000083, 0x04000081, BadOp, 0x00000023, InstructionFlags.None)); + Add(X86Instruction.Andnpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Andnps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstructionFlags.Vex)); + Add(X86Instruction.Andpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f54, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Andps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f54, InstructionFlags.Vex)); + Add(X86Instruction.Blendvpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3815, InstructionFlags.Prefix66)); + Add(X86Instruction.Blendvps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3814, InstructionFlags.Prefix66)); + Add(X86Instruction.Bsr, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbd, InstructionFlags.None)); + Add(X86Instruction.Bswap, new InstructionInfo(0x00000fc8, BadOp, BadOp, BadOp, BadOp, InstructionFlags.RegOnly)); + Add(X86Instruction.Call, new InstructionInfo(0x020000ff, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Cmovcc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f40, InstructionFlags.None)); + Add(X86Instruction.Cmp, new InstructionInfo(0x00000039, 0x07000083, 0x07000081, BadOp, 0x0000003b, InstructionFlags.None)); + Add(X86Instruction.Cmppd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Cmpps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex)); + Add(X86Instruction.Cmpsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Cmpss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Cmpxchg, new InstructionInfo(0x00000fb1, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Cmpxchg16b, new InstructionInfo(0x01000fc7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.RexW)); + Add(X86Instruction.Cmpxchg8, new InstructionInfo(0x00000fb0, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Reg8Src)); + Add(X86Instruction.Comisd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Comiss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex)); + Add(X86Instruction.Crc32, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38f1, InstructionFlags.PrefixF2)); + Add(X86Instruction.Crc32_16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38f1, InstructionFlags.PrefixF2 | InstructionFlags.Prefix66)); + Add(X86Instruction.Crc32_8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38f0, InstructionFlags.PrefixF2 | InstructionFlags.Reg8Src)); + Add(X86Instruction.Cvtdq2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Cvtdq2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex)); + Add(X86Instruction.Cvtpd2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Cvtpd2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Cvtps2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Cvtps2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex)); + Add(X86Instruction.Cvtsd2si, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2d, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Cvtsd2ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Cvtsi2sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Cvtsi2ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Cvtss2sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Cvtss2si, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2d, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Div, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x060000f7, InstructionFlags.None)); + Add(X86Instruction.Divpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Divps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex)); + Add(X86Instruction.Divsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Divss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Gf2p8affineqb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3ace, InstructionFlags.Prefix66)); + Add(X86Instruction.Haddpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Haddps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Idiv, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x070000f7, InstructionFlags.None)); + Add(X86Instruction.Imul, new InstructionInfo(BadOp, 0x0000006b, 0x00000069, BadOp, 0x00000faf, InstructionFlags.None)); + Add(X86Instruction.Imul128, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x050000f7, InstructionFlags.None)); + Add(X86Instruction.Insertps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a21, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Jmp, new InstructionInfo(0x040000ff, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Ldmxcsr, new InstructionInfo(0x02000fae, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex)); + Add(X86Instruction.Lea, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x0000008d, InstructionFlags.None)); + Add(X86Instruction.Maxpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Maxps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex)); + Add(X86Instruction.Maxsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Maxss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Minpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Minps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex)); + Add(X86Instruction.Minsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Minss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Mov, new InstructionInfo(0x00000089, BadOp, 0x000000c7, 0x000000b8, 0x0000008b, InstructionFlags.None)); + Add(X86Instruction.Mov16, new InstructionInfo(0x00000089, BadOp, 0x000000c7, BadOp, 0x0000008b, InstructionFlags.Prefix66)); + Add(X86Instruction.Mov8, new InstructionInfo(0x00000088, 0x000000c6, BadOp, BadOp, 0x0000008a, InstructionFlags.Reg8Src | InstructionFlags.Reg8Dest)); + Add(X86Instruction.Movd, new InstructionInfo(0x00000f7e, BadOp, BadOp, BadOp, 0x00000f6e, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Movdqu, new InstructionInfo(0x00000f7f, BadOp, BadOp, BadOp, 0x00000f6f, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Movhlps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f12, InstructionFlags.Vex)); + Add(X86Instruction.Movlhps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f16, InstructionFlags.Vex)); + Add(X86Instruction.Movq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7e, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Movsd, new InstructionInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Movss, new InstructionInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Movsx16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbf, InstructionFlags.None)); + Add(X86Instruction.Movsx32, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000063, InstructionFlags.None)); + Add(X86Instruction.Movsx8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbe, InstructionFlags.Reg8Src)); + Add(X86Instruction.Movzx16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb7, InstructionFlags.None)); + Add(X86Instruction.Movzx8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb6, InstructionFlags.Reg8Src)); + Add(X86Instruction.Mul128, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x040000f7, InstructionFlags.None)); + Add(X86Instruction.Mulpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Mulps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex)); + Add(X86Instruction.Mulsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Mulss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Neg, new InstructionInfo(0x030000f7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Not, new InstructionInfo(0x020000f7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Or, new InstructionInfo(0x00000009, 0x01000083, 0x01000081, BadOp, 0x0000000b, InstructionFlags.None)); + Add(X86Instruction.Paddb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffc, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Paddd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffe, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Paddq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd4, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Paddw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffd, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Palignr, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0f, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pand, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdb, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pandn, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdf, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pavgb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe0, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pavgw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe3, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3810, InstructionFlags.Prefix66)); + Add(X86Instruction.Pclmulqdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a44, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpeqb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f74, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpeqd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f76, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpeqq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3829, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpeqw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f75, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpgtb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f64, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpgtd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f66, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpgtq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3837, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpgtw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f65, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pextrb, new InstructionInfo(0x000f3a14, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pextrd, new InstructionInfo(0x000f3a16, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pextrq, new InstructionInfo(0x000f3a16, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.RexW | InstructionFlags.Prefix66)); + Add(X86Instruction.Pextrw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc5, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pinsrb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a20, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pinsrd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a22, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pinsrq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a22, InstructionFlags.Vex | InstructionFlags.RexW | InstructionFlags.Prefix66)); + Add(X86Instruction.Pinsrw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc4, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmaxsb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383c, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmaxsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383d, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmaxsw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fee, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmaxub, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fde, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmaxud, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383f, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmaxuw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383e, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pminsb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3838, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pminsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3839, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pminsw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fea, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pminub, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fda, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pminud, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383b, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pminuw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383a, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmovsxbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3820, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmovsxdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3825, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmovsxwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3823, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmovzxbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3830, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmovzxdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3835, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmovzxwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3833, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmulld, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3840, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmullw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd5, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pop, new InstructionInfo(0x0000008f, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Popcnt, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb8, InstructionFlags.PrefixF3)); + Add(X86Instruction.Por, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000feb, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pshufb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3800, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pshufd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f70, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pslld, new InstructionInfo(BadOp, 0x06000f72, BadOp, BadOp, 0x00000ff2, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pslldq, new InstructionInfo(BadOp, 0x07000f73, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psllq, new InstructionInfo(BadOp, 0x06000f73, BadOp, BadOp, 0x00000ff3, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psllw, new InstructionInfo(BadOp, 0x06000f71, BadOp, BadOp, 0x00000ff1, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psrad, new InstructionInfo(BadOp, 0x04000f72, BadOp, BadOp, 0x00000fe2, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psraw, new InstructionInfo(BadOp, 0x04000f71, BadOp, BadOp, 0x00000fe1, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psrld, new InstructionInfo(BadOp, 0x02000f72, BadOp, BadOp, 0x00000fd2, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psrlq, new InstructionInfo(BadOp, 0x02000f73, BadOp, BadOp, 0x00000fd3, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psrldq, new InstructionInfo(BadOp, 0x03000f73, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psrlw, new InstructionInfo(BadOp, 0x02000f71, BadOp, BadOp, 0x00000fd1, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psubb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff8, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psubd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffa, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psubq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffb, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psubw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff9, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpckhbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f68, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpckhdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6a, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpckhqdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6d, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpckhwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f69, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpcklbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f60, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpckldq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f62, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpcklqdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6c, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpcklwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f61, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Push, new InstructionInfo(BadOp, 0x0000006a, 0x00000068, BadOp, 0x060000ff, InstructionFlags.None)); + Add(X86Instruction.Pxor, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fef, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Rcpps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstructionFlags.Vex)); + Add(X86Instruction.Rcpss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Ror, new InstructionInfo(0x010000d3, 0x010000c1, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Roundpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a09, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Roundps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a08, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Roundsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0b, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Roundss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0a, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Rsqrtps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstructionFlags.Vex)); + Add(X86Instruction.Rsqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Sar, new InstructionInfo(0x070000d3, 0x070000c1, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Setcc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f90, InstructionFlags.Reg8Dest)); + Add(X86Instruction.Sha256Msg1, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cc, InstructionFlags.None)); + Add(X86Instruction.Sha256Msg2, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cd, InstructionFlags.None)); + Add(X86Instruction.Sha256Rnds2, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cb, InstructionFlags.None)); + Add(X86Instruction.Shl, new InstructionInfo(0x040000d3, 0x040000c1, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Shr, new InstructionInfo(0x050000d3, 0x050000c1, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Shufpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Shufps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstructionFlags.Vex)); + Add(X86Instruction.Sqrtpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Sqrtps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex)); + Add(X86Instruction.Sqrtsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Sqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Stmxcsr, new InstructionInfo(0x03000fae, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex)); + Add(X86Instruction.Sub, new InstructionInfo(0x00000029, 0x05000083, 0x05000081, BadOp, 0x0000002b, InstructionFlags.None)); + Add(X86Instruction.Subpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Subps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex)); + Add(X86Instruction.Subsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Subss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Test, new InstructionInfo(0x00000085, BadOp, 0x000000f7, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Unpckhpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Unpckhps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstructionFlags.Vex)); + Add(X86Instruction.Unpcklpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Unpcklps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex)); + Add(X86Instruction.Vblendvpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4b, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vblendvps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4a, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vcvtph2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3813, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vcvtps2ph, new InstructionInfo(0x000f3a1d, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vfmadd231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); + Add(X86Instruction.Vfmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vfmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); + Add(X86Instruction.Vfmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vfmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); + Add(X86Instruction.Vfmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vfnmadd231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); + Add(X86Instruction.Vfnmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vfnmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); + Add(X86Instruction.Vfnmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vfnmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); + Add(X86Instruction.Vfnmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vpternlogd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a25, InstructionFlags.Evex | InstructionFlags.Prefix66)); + Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None)); + Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Xorps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex)); + + static void Add(X86Instruction inst, in InstructionInfo info) + { + _instTable[(int)inst] = info; + } + } + } +} diff --git a/src/ARMeilleure/CodeGen/X86/CallConvName.cs b/src/ARMeilleure/CodeGen/X86/CallConvName.cs new file mode 100644 index 00000000..be367628 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/CallConvName.cs @@ -0,0 +1,8 @@ +namespace ARMeilleure.CodeGen.X86 +{ + enum CallConvName + { + SystemV, + Windows + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/CallingConvention.cs b/src/ARMeilleure/CodeGen/X86/CallingConvention.cs new file mode 100644 index 00000000..953fef5b --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/CallingConvention.cs @@ -0,0 +1,158 @@ +using System; + +namespace ARMeilleure.CodeGen.X86 +{ + static class CallingConvention + { + private const int RegistersMask = 0xffff; + + public static int GetIntAvailableRegisters() + { + return RegistersMask & ~(1 << (int)X86Register.Rsp); + } + + public static int GetVecAvailableRegisters() + { + return RegistersMask; + } + + public static int GetIntCallerSavedRegisters() + { + if (GetCurrentCallConv() == CallConvName.Windows) + { + return (1 << (int)X86Register.Rax) | + (1 << (int)X86Register.Rcx) | + (1 << (int)X86Register.Rdx) | + (1 << (int)X86Register.R8) | + (1 << (int)X86Register.R9) | + (1 << (int)X86Register.R10) | + (1 << (int)X86Register.R11); + } + else /* if (GetCurrentCallConv() == CallConvName.SystemV) */ + { + return (1 << (int)X86Register.Rax) | + (1 << (int)X86Register.Rcx) | + (1 << (int)X86Register.Rdx) | + (1 << (int)X86Register.Rsi) | + (1 << (int)X86Register.Rdi) | + (1 << (int)X86Register.R8) | + (1 << (int)X86Register.R9) | + (1 << (int)X86Register.R10) | + (1 << (int)X86Register.R11); + } + } + + public static int GetVecCallerSavedRegisters() + { + if (GetCurrentCallConv() == CallConvName.Windows) + { + return (1 << (int)X86Register.Xmm0) | + (1 << (int)X86Register.Xmm1) | + (1 << (int)X86Register.Xmm2) | + (1 << (int)X86Register.Xmm3) | + (1 << (int)X86Register.Xmm4) | + (1 << (int)X86Register.Xmm5); + } + else /* if (GetCurrentCallConv() == CallConvName.SystemV) */ + { + return RegistersMask; + } + } + + public static int GetIntCalleeSavedRegisters() + { + return GetIntCallerSavedRegisters() ^ RegistersMask; + } + + public static int GetVecCalleeSavedRegisters() + { + return GetVecCallerSavedRegisters() ^ RegistersMask; + } + + public static int GetArgumentsOnRegsCount() + { + return 4; + } + + public static int GetIntArgumentsOnRegsCount() + { + return 6; + } + + public static int GetVecArgumentsOnRegsCount() + { + return 8; + } + + public static X86Register GetIntArgumentRegister(int index) + { + if (GetCurrentCallConv() == CallConvName.Windows) + { + switch (index) + { + case 0: return X86Register.Rcx; + case 1: return X86Register.Rdx; + case 2: return X86Register.R8; + case 3: return X86Register.R9; + } + } + else /* if (GetCurrentCallConv() == CallConvName.SystemV) */ + { + switch (index) + { + case 0: return X86Register.Rdi; + case 1: return X86Register.Rsi; + case 2: return X86Register.Rdx; + case 3: return X86Register.Rcx; + case 4: return X86Register.R8; + case 5: return X86Register.R9; + } + } + + throw new ArgumentOutOfRangeException(nameof(index)); + } + + public static X86Register GetVecArgumentRegister(int index) + { + int count; + + if (GetCurrentCallConv() == CallConvName.Windows) + { + count = 4; + } + else /* if (GetCurrentCallConv() == CallConvName.SystemV) */ + { + count = 8; + } + + if ((uint)index < count) + { + return X86Register.Xmm0 + index; + } + + throw new ArgumentOutOfRangeException(nameof(index)); + } + + public static X86Register GetIntReturnRegister() + { + return X86Register.Rax; + } + + public static X86Register GetIntReturnRegisterHigh() + { + return X86Register.Rdx; + } + + public static X86Register GetVecReturnRegister() + { + return X86Register.Xmm0; + } + + public static CallConvName GetCurrentCallConv() + { + return OperatingSystem.IsWindows() + ? CallConvName.Windows + : CallConvName.SystemV; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/CodeGenCommon.cs b/src/ARMeilleure/CodeGen/X86/CodeGenCommon.cs new file mode 100644 index 00000000..237ecee4 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/CodeGenCommon.cs @@ -0,0 +1,19 @@ +using ARMeilleure.IntermediateRepresentation; + +namespace ARMeilleure.CodeGen.X86 +{ + static class CodeGenCommon + { + public static bool IsLongConst(Operand op) + { + long value = op.Type == OperandType.I32 ? op.AsInt32() : op.AsInt64(); + + return !ConstFitsOnS32(value); + } + + private static bool ConstFitsOnS32(long value) + { + return value == (int)value; + } + } +} diff --git a/src/ARMeilleure/CodeGen/X86/CodeGenContext.cs b/src/ARMeilleure/CodeGen/X86/CodeGenContext.cs new file mode 100644 index 00000000..89948724 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/CodeGenContext.cs @@ -0,0 +1,105 @@ +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.IntermediateRepresentation; +using Ryujinx.Common.Memory; +using System.IO; +using System.Numerics; + +namespace ARMeilleure.CodeGen.X86 +{ + class CodeGenContext + { + private readonly Stream _stream; + private readonly Operand[] _blockLabels; + + public int StreamOffset => (int)_stream.Length; + + public AllocationResult AllocResult { get; } + + public Assembler Assembler { get; } + public BasicBlock CurrBlock { get; private set; } + + public int CallArgsRegionSize { get; } + public int XmmSaveRegionSize { get; } + + public CodeGenContext(AllocationResult allocResult, int maxCallArgs, int blocksCount, bool relocatable) + { + _stream = MemoryStreamManager.Shared.GetStream(); + _blockLabels = new Operand[blocksCount]; + + AllocResult = allocResult; + Assembler = new Assembler(_stream, relocatable); + + CallArgsRegionSize = GetCallArgsRegionSize(allocResult, maxCallArgs, out int xmmSaveRegionSize); + XmmSaveRegionSize = xmmSaveRegionSize; + } + + private static int GetCallArgsRegionSize(AllocationResult allocResult, int maxCallArgs, out int xmmSaveRegionSize) + { + // We need to add 8 bytes to the total size, as the call to this function already pushed 8 bytes (the + // return address). + int intMask = CallingConvention.GetIntCalleeSavedRegisters() & allocResult.IntUsedRegisters; + int vecMask = CallingConvention.GetVecCalleeSavedRegisters() & allocResult.VecUsedRegisters; + + xmmSaveRegionSize = BitOperations.PopCount((uint)vecMask) * 16; + + int calleeSaveRegionSize = BitOperations.PopCount((uint)intMask) * 8 + xmmSaveRegionSize + 8; + + int argsCount = maxCallArgs; + + if (argsCount < 0) + { + // When the function has no calls, argsCount is -1. In this case, we don't need to allocate the shadow + // space. + argsCount = 0; + } + else if (argsCount < 4) + { + // The ABI mandates that the space for at least 4 arguments is reserved on the stack (this is called + // shadow space). + argsCount = 4; + } + + // TODO: Align XMM save region to 16 bytes because unwinding on Windows requires it. + int frameSize = calleeSaveRegionSize + allocResult.SpillRegionSize; + + // TODO: Instead of always multiplying by 16 (the largest possible size of a variable, since a V128 has 16 + // bytes), we should calculate the exact size consumed by the arguments passed to the called functions on + // the stack. + int callArgsAndFrameSize = frameSize + argsCount * 16; + + // Ensure that the Stack Pointer will be aligned to 16 bytes. + callArgsAndFrameSize = (callArgsAndFrameSize + 0xf) & ~0xf; + + return callArgsAndFrameSize - frameSize; + } + + public void EnterBlock(BasicBlock block) + { + Assembler.MarkLabel(GetLabel(block)); + + CurrBlock = block; + } + + public void JumpTo(BasicBlock target) + { + Assembler.Jmp(GetLabel(target)); + } + + public void JumpTo(X86Condition condition, BasicBlock target) + { + Assembler.Jcc(condition, GetLabel(target)); + } + + private Operand GetLabel(BasicBlock block) + { + ref Operand label = ref _blockLabels[block.Index]; + + if (label == default) + { + label = Operand.Factory.Label(); + } + + return label; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/src/ARMeilleure/CodeGen/X86/CodeGenerator.cs new file mode 100644 index 00000000..e7179b51 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/CodeGenerator.cs @@ -0,0 +1,1865 @@ +using ARMeilleure.CodeGen.Linking; +using ARMeilleure.CodeGen.Optimizations; +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.CodeGen.Unwinding; +using ARMeilleure.Common; +using ARMeilleure.Diagnostics; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Numerics; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.CodeGen.X86 +{ + static class CodeGenerator + { + private const int RegistersCount = 16; + private const int PageSize = 0x1000; + private const int StackGuardSize = 0x2000; + + private static readonly Action<CodeGenContext, Operation>[] _instTable; + + static CodeGenerator() + { + _instTable = new Action<CodeGenContext, Operation>[EnumUtils.GetCount(typeof(Instruction))]; + + Add(Instruction.Add, GenerateAdd); + Add(Instruction.BitwiseAnd, GenerateBitwiseAnd); + Add(Instruction.BitwiseExclusiveOr, GenerateBitwiseExclusiveOr); + Add(Instruction.BitwiseNot, GenerateBitwiseNot); + Add(Instruction.BitwiseOr, GenerateBitwiseOr); + Add(Instruction.BranchIf, GenerateBranchIf); + Add(Instruction.ByteSwap, GenerateByteSwap); + Add(Instruction.Call, GenerateCall); + Add(Instruction.Clobber, GenerateClobber); + Add(Instruction.Compare, GenerateCompare); + Add(Instruction.CompareAndSwap, GenerateCompareAndSwap); + Add(Instruction.CompareAndSwap16, GenerateCompareAndSwap16); + Add(Instruction.CompareAndSwap8, GenerateCompareAndSwap8); + Add(Instruction.ConditionalSelect, GenerateConditionalSelect); + Add(Instruction.ConvertI64ToI32, GenerateConvertI64ToI32); + Add(Instruction.ConvertToFP, GenerateConvertToFP); + Add(Instruction.Copy, GenerateCopy); + Add(Instruction.CountLeadingZeros, GenerateCountLeadingZeros); + Add(Instruction.Divide, GenerateDivide); + Add(Instruction.DivideUI, GenerateDivideUI); + Add(Instruction.Fill, GenerateFill); + Add(Instruction.Load, GenerateLoad); + Add(Instruction.Load16, GenerateLoad16); + Add(Instruction.Load8, GenerateLoad8); + Add(Instruction.MemoryBarrier, GenerateMemoryBarrier); + Add(Instruction.Multiply, GenerateMultiply); + Add(Instruction.Multiply64HighSI, GenerateMultiply64HighSI); + Add(Instruction.Multiply64HighUI, GenerateMultiply64HighUI); + Add(Instruction.Negate, GenerateNegate); + Add(Instruction.Return, GenerateReturn); + Add(Instruction.RotateRight, GenerateRotateRight); + Add(Instruction.ShiftLeft, GenerateShiftLeft); + Add(Instruction.ShiftRightSI, GenerateShiftRightSI); + Add(Instruction.ShiftRightUI, GenerateShiftRightUI); + Add(Instruction.SignExtend16, GenerateSignExtend16); + Add(Instruction.SignExtend32, GenerateSignExtend32); + Add(Instruction.SignExtend8, GenerateSignExtend8); + Add(Instruction.Spill, GenerateSpill); + Add(Instruction.SpillArg, GenerateSpillArg); + Add(Instruction.StackAlloc, GenerateStackAlloc); + Add(Instruction.Store, GenerateStore); + Add(Instruction.Store16, GenerateStore16); + Add(Instruction.Store8, GenerateStore8); + Add(Instruction.Subtract, GenerateSubtract); + Add(Instruction.Tailcall, GenerateTailcall); + Add(Instruction.VectorCreateScalar, GenerateVectorCreateScalar); + Add(Instruction.VectorExtract, GenerateVectorExtract); + Add(Instruction.VectorExtract16, GenerateVectorExtract16); + Add(Instruction.VectorExtract8, GenerateVectorExtract8); + Add(Instruction.VectorInsert, GenerateVectorInsert); + Add(Instruction.VectorInsert16, GenerateVectorInsert16); + Add(Instruction.VectorInsert8, GenerateVectorInsert8); + Add(Instruction.VectorOne, GenerateVectorOne); + Add(Instruction.VectorZero, GenerateVectorZero); + Add(Instruction.VectorZeroUpper64, GenerateVectorZeroUpper64); + Add(Instruction.VectorZeroUpper96, GenerateVectorZeroUpper96); + Add(Instruction.ZeroExtend16, GenerateZeroExtend16); + Add(Instruction.ZeroExtend32, GenerateZeroExtend32); + Add(Instruction.ZeroExtend8, GenerateZeroExtend8); + + static void Add(Instruction inst, Action<CodeGenContext, Operation> func) + { + _instTable[(int)inst] = func; + } + } + + public static CompiledFunction Generate(CompilerContext cctx) + { + ControlFlowGraph cfg = cctx.Cfg; + + Logger.StartPass(PassName.Optimization); + + if (cctx.Options.HasFlag(CompilerOptions.Optimize)) + { + if (cctx.Options.HasFlag(CompilerOptions.SsaForm)) + { + Optimizer.RunPass(cfg); + } + + BlockPlacement.RunPass(cfg); + } + + X86Optimizer.RunPass(cfg); + + Logger.EndPass(PassName.Optimization, cfg); + + Logger.StartPass(PassName.PreAllocation); + + StackAllocator stackAlloc = new(); + + PreAllocator.RunPass(cctx, stackAlloc, out int maxCallArgs); + + Logger.EndPass(PassName.PreAllocation, cfg); + + Logger.StartPass(PassName.RegisterAllocation); + + if (cctx.Options.HasFlag(CompilerOptions.SsaForm)) + { + Ssa.Deconstruct(cfg); + } + + IRegisterAllocator regAlloc; + + if (cctx.Options.HasFlag(CompilerOptions.Lsra)) + { + regAlloc = new LinearScanAllocator(); + } + else + { + regAlloc = new HybridAllocator(); + } + + RegisterMasks regMasks = new( + CallingConvention.GetIntAvailableRegisters(), + CallingConvention.GetVecAvailableRegisters(), + CallingConvention.GetIntCallerSavedRegisters(), + CallingConvention.GetVecCallerSavedRegisters(), + CallingConvention.GetIntCalleeSavedRegisters(), + CallingConvention.GetVecCalleeSavedRegisters(), + RegistersCount); + + AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks); + + Logger.EndPass(PassName.RegisterAllocation, cfg); + + Logger.StartPass(PassName.CodeGeneration); + + bool relocatable = (cctx.Options & CompilerOptions.Relocatable) != 0; + + CodeGenContext context = new(allocResult, maxCallArgs, cfg.Blocks.Count, relocatable); + + UnwindInfo unwindInfo = WritePrologue(context); + + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + context.EnterBlock(block); + + for (Operation node = block.Operations.First; node != default; node = node.ListNext) + { + GenerateOperation(context, node); + } + + if (block.SuccessorsCount == 0) + { + // The only blocks which can have 0 successors are exit blocks. + Operation last = block.Operations.Last; + + Debug.Assert(last.Instruction == Instruction.Tailcall || + last.Instruction == Instruction.Return); + } + else + { + BasicBlock succ = block.GetSuccessor(0); + + if (succ != block.ListNext) + { + context.JumpTo(succ); + } + } + } + + (byte[] code, RelocInfo relocInfo) = context.Assembler.GetCode(); + + Logger.EndPass(PassName.CodeGeneration); + + return new CompiledFunction(code, unwindInfo, relocInfo); + } + + private static void GenerateOperation(CodeGenContext context, Operation operation) + { + if (operation.Instruction == Instruction.Extended) + { + IntrinsicInfo info = IntrinsicTable.GetInfo(operation.Intrinsic); + + switch (info.Type) + { + case IntrinsicType.Comis_: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + switch (operation.Intrinsic) + { + case Intrinsic.X86Comisdeq: + context.Assembler.Comisd(src1, src2); + context.Assembler.Setcc(dest, X86Condition.Equal); + break; + + case Intrinsic.X86Comisdge: + context.Assembler.Comisd(src1, src2); + context.Assembler.Setcc(dest, X86Condition.AboveOrEqual); + break; + + case Intrinsic.X86Comisdlt: + context.Assembler.Comisd(src1, src2); + context.Assembler.Setcc(dest, X86Condition.Below); + break; + + case Intrinsic.X86Comisseq: + context.Assembler.Comiss(src1, src2); + context.Assembler.Setcc(dest, X86Condition.Equal); + break; + + case Intrinsic.X86Comissge: + context.Assembler.Comiss(src1, src2); + context.Assembler.Setcc(dest, X86Condition.AboveOrEqual); + break; + + case Intrinsic.X86Comisslt: + context.Assembler.Comiss(src1, src2); + context.Assembler.Setcc(dest, X86Condition.Below); + break; + } + + context.Assembler.Movzx8(dest, dest, OperandType.I32); + + break; + } + + case IntrinsicType.Mxcsr: + { + Operand offset = operation.GetSource(0); + + Debug.Assert(offset.Kind == OperandKind.Constant); + Debug.Assert(offset.Type == OperandType.I32); + + int offs = offset.AsInt32() + context.CallArgsRegionSize; + + Operand rsp = Register(X86Register.Rsp); + Operand memOp = MemoryOp(OperandType.I32, rsp, default, Multiplier.x1, offs); + + Debug.Assert(HardwareCapabilities.SupportsSse || HardwareCapabilities.SupportsVexEncoding); + + if (operation.Intrinsic == Intrinsic.X86Ldmxcsr) + { + Operand bits = operation.GetSource(1); + Debug.Assert(bits.Type == OperandType.I32); + + context.Assembler.Mov(memOp, bits, OperandType.I32); + context.Assembler.Ldmxcsr(memOp); + } + else if (operation.Intrinsic == Intrinsic.X86Stmxcsr) + { + Operand dest = operation.Destination; + Debug.Assert(dest.Type == OperandType.I32); + + context.Assembler.Stmxcsr(memOp); + context.Assembler.Mov(dest, memOp, OperandType.I32); + } + + break; + } + + case IntrinsicType.PopCount: + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + EnsureSameType(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Popcnt(dest, source, dest.Type); + + break; + } + + case IntrinsicType.Unary: + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + EnsureSameType(dest, source); + + Debug.Assert(!dest.Type.IsInteger()); + + context.Assembler.WriteInstruction(info.Inst, dest, source); + + break; + } + + case IntrinsicType.UnaryToGpr: + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && !source.Type.IsInteger()); + + if (operation.Intrinsic == Intrinsic.X86Cvtsi2si) + { + if (dest.Type == OperandType.I32) + { + context.Assembler.Movd(dest, source); // int _mm_cvtsi128_si32(__m128i a) + } + else /* if (dest.Type == OperandType.I64) */ + { + context.Assembler.Movq(dest, source); // __int64 _mm_cvtsi128_si64(__m128i a) + } + } + else + { + context.Assembler.WriteInstruction(info.Inst, dest, source, dest.Type); + } + + break; + } + + case IntrinsicType.Binary: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(dest, src1); + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(!dest.Type.IsInteger()); + Debug.Assert(!src2.Type.IsInteger() || src2.Kind == OperandKind.Constant); + + context.Assembler.WriteInstruction(info.Inst, dest, src1, src2); + + break; + } + + case IntrinsicType.BinaryGpr: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(dest, src1); + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(!dest.Type.IsInteger() && src2.Type.IsInteger()); + + context.Assembler.WriteInstruction(info.Inst, dest, src1, src2, src2.Type); + + break; + } + + case IntrinsicType.Crc32: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameReg(dest, src1); + + Debug.Assert(dest.Type.IsInteger() && src1.Type.IsInteger() && src2.Type.IsInteger()); + + context.Assembler.WriteInstruction(info.Inst, dest, src2, dest.Type); + + break; + } + + case IntrinsicType.BinaryImm: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(dest, src1); + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(!dest.Type.IsInteger() && src2.Kind == OperandKind.Constant); + + context.Assembler.WriteInstruction(info.Inst, dest, src1, src2.AsByte()); + + break; + } + + case IntrinsicType.Ternary: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + EnsureSameType(dest, src1, src2, src3); + + Debug.Assert(!dest.Type.IsInteger()); + + if (info.Inst == X86Instruction.Blendvpd && HardwareCapabilities.SupportsVexEncoding) + { + context.Assembler.WriteInstruction(X86Instruction.Vblendvpd, dest, src1, src2, src3); + } + else if (info.Inst == X86Instruction.Blendvps && HardwareCapabilities.SupportsVexEncoding) + { + context.Assembler.WriteInstruction(X86Instruction.Vblendvps, dest, src1, src2, src3); + } + else if (info.Inst == X86Instruction.Pblendvb && HardwareCapabilities.SupportsVexEncoding) + { + context.Assembler.WriteInstruction(X86Instruction.Vpblendvb, dest, src1, src2, src3); + } + else + { + EnsureSameReg(dest, src1); + + Debug.Assert(src3.GetRegister().Index == 0); + + context.Assembler.WriteInstruction(info.Inst, dest, src1, src2); + } + + break; + } + + case IntrinsicType.TernaryImm: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + EnsureSameType(dest, src1, src2); + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(!dest.Type.IsInteger() && src3.Kind == OperandKind.Constant); + + context.Assembler.WriteInstruction(info.Inst, dest, src1, src2, src3.AsByte()); + + break; + } + + case IntrinsicType.Fma: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + Debug.Assert(HardwareCapabilities.SupportsVexEncoding); + + Debug.Assert(dest.Kind == OperandKind.Register && src1.Kind == OperandKind.Register && src2.Kind == OperandKind.Register); + Debug.Assert(src3.Kind == OperandKind.Register || src3.Kind == OperandKind.Memory); + + EnsureSameType(dest, src1, src2, src3); + Debug.Assert(dest.Type == OperandType.V128); + + Debug.Assert(dest.Value == src1.Value); + + context.Assembler.WriteInstruction(info.Inst, dest, src2, src3); + + break; + } + } + } + else + { + Action<CodeGenContext, Operation> func = _instTable[(int)operation.Instruction]; + + if (func != null) + { + func(context, operation); + } + else + { + throw new ArgumentException($"Invalid instruction \"{operation.Instruction}\"."); + } + } + } + + private static void GenerateAdd(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + if (dest.Type.IsInteger()) + { + // If Destination and Source 1 Operands are the same, perform a standard add as there are no benefits to using LEA. + if (dest.Kind == src1.Kind && dest.Value == src1.Value) + { + ValidateBinOp(dest, src1, src2); + + context.Assembler.Add(dest, src2, dest.Type); + } + else + { + EnsureSameType(dest, src1, src2); + + int offset; + Operand index; + + if (src2.Kind == OperandKind.Constant) + { + offset = src2.AsInt32(); + index = default; + } + else + { + offset = 0; + index = src2; + } + + Operand memOp = MemoryOp(dest.Type, src1, index, Multiplier.x1, offset); + + context.Assembler.Lea(dest, memOp, dest.Type); + } + } + else + { + ValidateBinOp(dest, src1, src2); + + if (dest.Type == OperandType.FP32) + { + context.Assembler.Addss(dest, src1, src2); + } + else /* if (dest.Type == OperandType.FP64) */ + { + context.Assembler.Addsd(dest, src1, src2); + } + } + } + + private static void GenerateBitwiseAnd(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + Debug.Assert(dest.Type.IsInteger()); + + // Note: GenerateCompareCommon makes the assumption that BitwiseAnd will emit only a single `and` + // instruction. + context.Assembler.And(dest, src2, dest.Type); + } + + private static void GenerateBitwiseExclusiveOr(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Xor(dest, src2, dest.Type); + } + else + { + context.Assembler.Xorps(dest, src1, src2); + } + } + + private static void GenerateBitwiseNot(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + ValidateUnOp(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Not(dest); + } + + private static void GenerateBitwiseOr(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Or(dest, src2, dest.Type); + } + + private static void GenerateBranchIf(CodeGenContext context, Operation operation) + { + Operand comp = operation.GetSource(2); + + Debug.Assert(comp.Kind == OperandKind.Constant); + + var cond = ((Comparison)comp.AsInt32()).ToX86Condition(); + + GenerateCompareCommon(context, operation); + + context.JumpTo(cond, context.CurrBlock.GetSuccessor(1)); + } + + private static void GenerateByteSwap(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + ValidateUnOp(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Bswap(dest); + } + + private static void GenerateCall(CodeGenContext context, Operation operation) + { + context.Assembler.Call(operation.GetSource(0)); + } + + private static void GenerateClobber(CodeGenContext context, Operation operation) + { + // This is only used to indicate that a register is clobbered to the + // register allocator, we don't need to produce any code. + } + + private static void GenerateCompare(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand comp = operation.GetSource(2); + + Debug.Assert(dest.Type == OperandType.I32); + Debug.Assert(comp.Kind == OperandKind.Constant); + + var cond = ((Comparison)comp.AsInt32()).ToX86Condition(); + + GenerateCompareCommon(context, operation); + + context.Assembler.Setcc(dest, cond); + context.Assembler.Movzx8(dest, dest, OperandType.I32); + } + + private static void GenerateCompareCommon(CodeGenContext context, Operation operation) + { + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(src1, src2); + + Debug.Assert(src1.Type.IsInteger()); + + if (src2.Kind == OperandKind.Constant && src2.Value == 0) + { + if (MatchOperation(operation.ListPrevious, Instruction.BitwiseAnd, src1.Type, src1.GetRegister())) + { + // Since the `test` and `and` instruction set the status flags in the same way, we can omit the + // `test r,r` instruction when it is immediately preceded by an `and r,*` instruction. + // + // For example: + // + // and eax, 0x3 + // test eax, eax + // jz .L0 + // + // => + // + // and eax, 0x3 + // jz .L0 + } + else + { + context.Assembler.Test(src1, src1, src1.Type); + } + } + else + { + context.Assembler.Cmp(src1, src2, src1.Type); + } + } + + private static void GenerateCompareAndSwap(CodeGenContext context, Operation operation) + { + Operand src1 = operation.GetSource(0); + + if (operation.SourcesCount == 5) // CompareAndSwap128 has 5 sources, compared to CompareAndSwap64/32's 3. + { + Operand memOp = MemoryOp(OperandType.I64, src1); + + context.Assembler.Cmpxchg16b(memOp); + } + else + { + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + EnsureSameType(src2, src3); + + Operand memOp = MemoryOp(src3.Type, src1); + + context.Assembler.Cmpxchg(memOp, src3); + } + } + + private static void GenerateCompareAndSwap16(CodeGenContext context, Operation operation) + { + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + EnsureSameType(src2, src3); + + Operand memOp = MemoryOp(src3.Type, src1); + + context.Assembler.Cmpxchg16(memOp, src3); + } + + private static void GenerateCompareAndSwap8(CodeGenContext context, Operation operation) + { + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + EnsureSameType(src2, src3); + + Operand memOp = MemoryOp(src3.Type, src1); + + context.Assembler.Cmpxchg8(memOp, src3); + } + + private static void GenerateConditionalSelect(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + EnsureSameReg (dest, src3); + EnsureSameType(dest, src2, src3); + + Debug.Assert(dest.Type.IsInteger()); + Debug.Assert(src1.Type == OperandType.I32); + + context.Assembler.Test (src1, src1, src1.Type); + context.Assembler.Cmovcc(dest, src2, dest.Type, X86Condition.NotEqual); + } + + private static void GenerateConvertI64ToI32(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.I32 && source.Type == OperandType.I64); + + context.Assembler.Mov(dest, source, OperandType.I32); + } + + private static void GenerateConvertToFP(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64); + + if (dest.Type == OperandType.FP32) + { + Debug.Assert(source.Type.IsInteger() || source.Type == OperandType.FP64); + + if (source.Type.IsInteger()) + { + context.Assembler.Xorps (dest, dest, dest); + context.Assembler.Cvtsi2ss(dest, dest, source, source.Type); + } + else /* if (source.Type == OperandType.FP64) */ + { + context.Assembler.Cvtsd2ss(dest, dest, source); + + GenerateZeroUpper96(context, dest, dest); + } + } + else /* if (dest.Type == OperandType.FP64) */ + { + Debug.Assert(source.Type.IsInteger() || source.Type == OperandType.FP32); + + if (source.Type.IsInteger()) + { + context.Assembler.Xorps (dest, dest, dest); + context.Assembler.Cvtsi2sd(dest, dest, source, source.Type); + } + else /* if (source.Type == OperandType.FP32) */ + { + context.Assembler.Cvtss2sd(dest, dest, source); + + GenerateZeroUpper64(context, dest, dest); + } + } + } + + private static void GenerateCopy(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + EnsureSameType(dest, source); + + Debug.Assert(dest.Type.IsInteger() || source.Kind != OperandKind.Constant); + + // Moves to the same register are useless. + if (dest.Kind == source.Kind && dest.Value == source.Value) + { + return; + } + + if (dest.Kind == OperandKind.Register && + source.Kind == OperandKind.Constant && source.Value == 0) + { + // Assemble "mov reg, 0" as "xor reg, reg" as the later is more efficient. + context.Assembler.Xor(dest, dest, OperandType.I32); + } + else if (dest.Type.IsInteger()) + { + context.Assembler.Mov(dest, source, dest.Type); + } + else + { + context.Assembler.Movdqu(dest, source); + } + } + + private static void GenerateCountLeadingZeros(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + EnsureSameType(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Bsr(dest, source, dest.Type); + + int operandSize = dest.Type == OperandType.I32 ? 32 : 64; + int operandMask = operandSize - 1; + + // When the input operand is 0, the result is undefined, however the + // ZF flag is set. We are supposed to return the operand size on that + // case. So, add an additional jump to handle that case, by moving the + // operand size constant to the destination register. + Operand neLabel = Label(); + + context.Assembler.Jcc(X86Condition.NotEqual, neLabel); + + context.Assembler.Mov(dest, Const(operandSize | operandMask), OperandType.I32); + + context.Assembler.MarkLabel(neLabel); + + // BSR returns the zero based index of the last bit set on the operand, + // starting from the least significant bit. However we are supposed to + // return the number of 0 bits on the high end. So, we invert the result + // of the BSR using XOR to get the correct value. + context.Assembler.Xor(dest, Const(operandMask), OperandType.I32); + } + + private static void GenerateDivide(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand dividend = operation.GetSource(0); + Operand divisor = operation.GetSource(1); + + if (!dest.Type.IsInteger()) + { + ValidateBinOp(dest, dividend, divisor); + } + + if (dest.Type.IsInteger()) + { + divisor = operation.GetSource(2); + + EnsureSameType(dest, divisor); + + if (divisor.Type == OperandType.I32) + { + context.Assembler.Cdq(); + } + else + { + context.Assembler.Cqo(); + } + + context.Assembler.Idiv(divisor); + } + else if (dest.Type == OperandType.FP32) + { + context.Assembler.Divss(dest, dividend, divisor); + } + else /* if (dest.Type == OperandType.FP64) */ + { + context.Assembler.Divsd(dest, dividend, divisor); + } + } + + private static void GenerateDivideUI(CodeGenContext context, Operation operation) + { + Operand divisor = operation.GetSource(2); + + Operand rdx = Register(X86Register.Rdx); + + Debug.Assert(divisor.Type.IsInteger()); + + context.Assembler.Xor(rdx, rdx, OperandType.I32); + context.Assembler.Div(divisor); + } + + private static void GenerateFill(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand offset = operation.GetSource(0); + + Debug.Assert(offset.Kind == OperandKind.Constant); + + int offs = offset.AsInt32() + context.CallArgsRegionSize; + + Operand rsp = Register(X86Register.Rsp); + + Operand memOp = MemoryOp(dest.Type, rsp, default, Multiplier.x1, offs); + + GenerateLoad(context, memOp, dest); + } + + private static void GenerateLoad(CodeGenContext context, Operation operation) + { + Operand value = operation.Destination; + Operand address = Memory(operation.GetSource(0), value.Type); + + GenerateLoad(context, address, value); + } + + private static void GenerateLoad16(CodeGenContext context, Operation operation) + { + Operand value = operation.Destination; + Operand address = Memory(operation.GetSource(0), value.Type); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.Movzx16(value, address, value.Type); + } + + private static void GenerateLoad8(CodeGenContext context, Operation operation) + { + Operand value = operation.Destination; + Operand address = Memory(operation.GetSource(0), value.Type); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.Movzx8(value, address, value.Type); + } + + private static void GenerateMemoryBarrier(CodeGenContext context, Operation operation) + { + context.Assembler.LockOr(MemoryOp(OperandType.I64, Register(X86Register.Rsp)), Const(0), OperandType.I32); + } + + private static void GenerateMultiply(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + if (src2.Kind != OperandKind.Constant) + { + EnsureSameReg(dest, src1); + } + + EnsureSameType(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + if (src2.Kind == OperandKind.Constant) + { + context.Assembler.Imul(dest, src1, src2, dest.Type); + } + else + { + context.Assembler.Imul(dest, src2, dest.Type); + } + } + else if (dest.Type == OperandType.FP32) + { + context.Assembler.Mulss(dest, src1, src2); + } + else /* if (dest.Type == OperandType.FP64) */ + { + context.Assembler.Mulsd(dest, src1, src2); + } + } + + private static void GenerateMultiply64HighSI(CodeGenContext context, Operation operation) + { + Operand source = operation.GetSource(1); + + Debug.Assert(source.Type == OperandType.I64); + + context.Assembler.Imul(source); + } + + private static void GenerateMultiply64HighUI(CodeGenContext context, Operation operation) + { + Operand source = operation.GetSource(1); + + Debug.Assert(source.Type == OperandType.I64); + + context.Assembler.Mul(source); + } + + private static void GenerateNegate(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + ValidateUnOp(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Neg(dest); + } + + private static void GenerateReturn(CodeGenContext context, Operation operation) + { + WriteEpilogue(context); + + context.Assembler.Return(); + } + + private static void GenerateRotateRight(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Ror(dest, src2, dest.Type); + } + + private static void GenerateShiftLeft(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Shl(dest, src2, dest.Type); + } + + private static void GenerateShiftRightSI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Sar(dest, src2, dest.Type); + } + + private static void GenerateShiftRightUI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Shr(dest, src2, dest.Type); + } + + private static void GenerateSignExtend16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Movsx16(dest, source, dest.Type); + } + + private static void GenerateSignExtend32(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Movsx32(dest, source, dest.Type); + } + + private static void GenerateSignExtend8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Movsx8(dest, source, dest.Type); + } + + private static void GenerateSpill(CodeGenContext context, Operation operation) + { + GenerateSpill(context, operation, context.CallArgsRegionSize); + } + + private static void GenerateSpillArg(CodeGenContext context, Operation operation) + { + GenerateSpill(context, operation, 0); + } + + private static void GenerateSpill(CodeGenContext context, Operation operation, int baseOffset) + { + Operand offset = operation.GetSource(0); + Operand source = operation.GetSource(1); + + Debug.Assert(offset.Kind == OperandKind.Constant); + + int offs = offset.AsInt32() + baseOffset; + + Operand rsp = Register(X86Register.Rsp); + + Operand memOp = MemoryOp(source.Type, rsp, default, Multiplier.x1, offs); + + GenerateStore(context, memOp, source); + } + + private static void GenerateStackAlloc(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand offset = operation.GetSource(0); + + Debug.Assert(offset.Kind == OperandKind.Constant); + + int offs = offset.AsInt32() + context.CallArgsRegionSize; + + Operand rsp = Register(X86Register.Rsp); + + Operand memOp = MemoryOp(OperandType.I64, rsp, default, Multiplier.x1, offs); + + context.Assembler.Lea(dest, memOp, OperandType.I64); + } + + private static void GenerateStore(CodeGenContext context, Operation operation) + { + Operand value = operation.GetSource(1); + Operand address = Memory(operation.GetSource(0), value.Type); + + GenerateStore(context, address, value); + } + + private static void GenerateStore16(CodeGenContext context, Operation operation) + { + Operand value = operation.GetSource(1); + Operand address = Memory(operation.GetSource(0), value.Type); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.Mov16(address, value); + } + + private static void GenerateStore8(CodeGenContext context, Operation operation) + { + Operand value = operation.GetSource(1); + Operand address = Memory(operation.GetSource(0), value.Type); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.Mov8(address, value); + } + + private static void GenerateSubtract(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Sub(dest, src2, dest.Type); + } + else if (dest.Type == OperandType.FP32) + { + context.Assembler.Subss(dest, src1, src2); + } + else /* if (dest.Type == OperandType.FP64) */ + { + context.Assembler.Subsd(dest, src1, src2); + } + } + + private static void GenerateTailcall(CodeGenContext context, Operation operation) + { + WriteEpilogue(context); + + context.Assembler.Jmp(operation.GetSource(0)); + } + + private static void GenerateVectorCreateScalar(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(!dest.Type.IsInteger() && source.Type.IsInteger()); + + if (source.Type == OperandType.I32) + { + context.Assembler.Movd(dest, source); // (__m128i _mm_cvtsi32_si128(int a)) + } + else /* if (source.Type == OperandType.I64) */ + { + context.Assembler.Movq(dest, source); // (__m128i _mm_cvtsi64_si128(__int64 a)) + } + } + + private static void GenerateVectorExtract(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; //Value + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Index + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src2.Kind == OperandKind.Constant); + + byte index = src2.AsByte(); + + Debug.Assert(index < OperandType.V128.GetSizeInBytes() / dest.Type.GetSizeInBytes()); + + if (dest.Type == OperandType.I32) + { + if (index == 0) + { + context.Assembler.Movd(dest, src1); + } + else if (HardwareCapabilities.SupportsSse41) + { + context.Assembler.Pextrd(dest, src1, index); + } + else + { + int mask0 = 0b11_10_01_00; + int mask1 = 0b11_10_01_00; + + mask0 = BitUtils.RotateRight(mask0, index * 2, 8); + mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8); + + context.Assembler.Pshufd(src1, src1, (byte)mask0); + context.Assembler.Movd (dest, src1); + context.Assembler.Pshufd(src1, src1, (byte)mask1); + } + } + else if (dest.Type == OperandType.I64) + { + if (index == 0) + { + context.Assembler.Movq(dest, src1); + } + else if (HardwareCapabilities.SupportsSse41) + { + context.Assembler.Pextrq(dest, src1, index); + } + else + { + const byte mask = 0b01_00_11_10; + + context.Assembler.Pshufd(src1, src1, mask); + context.Assembler.Movq (dest, src1); + context.Assembler.Pshufd(src1, src1, mask); + } + } + else + { + // Floating-point types. + if ((index >= 2 && dest.Type == OperandType.FP32) || + (index == 1 && dest.Type == OperandType.FP64)) + { + context.Assembler.Movhlps(dest, dest, src1); + context.Assembler.Movq (dest, dest); + } + else + { + context.Assembler.Movq(dest, src1); + } + + if (dest.Type == OperandType.FP32) + { + context.Assembler.Pshufd(dest, dest, (byte)(0xfc | (index & 1))); + } + } + } + + private static void GenerateVectorExtract16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; //Value + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Index + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src2.Kind == OperandKind.Constant); + + byte index = src2.AsByte(); + + Debug.Assert(index < 8); + + context.Assembler.Pextrw(dest, src1, index); + } + + private static void GenerateVectorExtract8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; //Value + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Index + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src2.Kind == OperandKind.Constant); + + byte index = src2.AsByte(); + + Debug.Assert(index < 16); + + if (HardwareCapabilities.SupportsSse41) + { + context.Assembler.Pextrb(dest, src1, index); + } + else + { + context.Assembler.Pextrw(dest, src1, (byte)(index >> 1)); + + if ((index & 1) != 0) + { + context.Assembler.Shr(dest, Const(8), OperandType.I32); + } + else + { + context.Assembler.Movzx8(dest, dest, OperandType.I32); + } + } + } + + private static void GenerateVectorInsert(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Value + Operand src3 = operation.GetSource(2); //Index + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + void InsertIntSse2(int words) + { + if (dest.GetRegister() != src1.GetRegister()) + { + context.Assembler.Movdqu(dest, src1); + } + + for (int word = 0; word < words; word++) + { + // Insert lower 16-bits. + context.Assembler.Pinsrw(dest, dest, src2, (byte)(index * words + word)); + + // Move next word down. + context.Assembler.Ror(src2, Const(16), src2.Type); + } + } + + if (src2.Type == OperandType.I32) + { + Debug.Assert(index < 4); + + if (HardwareCapabilities.SupportsSse41) + { + context.Assembler.Pinsrd(dest, src1, src2, index); + } + else + { + InsertIntSse2(2); + } + } + else if (src2.Type == OperandType.I64) + { + Debug.Assert(index < 2); + + if (HardwareCapabilities.SupportsSse41) + { + context.Assembler.Pinsrq(dest, src1, src2, index); + } + else + { + InsertIntSse2(4); + } + } + else if (src2.Type == OperandType.FP32) + { + Debug.Assert(index < 4); + + if (index != 0) + { + if (HardwareCapabilities.SupportsSse41) + { + context.Assembler.Insertps(dest, src1, src2, (byte)(index << 4)); + } + else + { + if (src1.GetRegister() == src2.GetRegister()) + { + int mask = 0b11_10_01_00; + + mask &= ~(0b11 << index * 2); + + context.Assembler.Pshufd(dest, src1, (byte)mask); + } + else + { + int mask0 = 0b11_10_01_00; + int mask1 = 0b11_10_01_00; + + mask0 = BitUtils.RotateRight(mask0, index * 2, 8); + mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8); + + context.Assembler.Pshufd(src1, src1, (byte)mask0); // Lane to be inserted in position 0. + context.Assembler.Movss (dest, src1, src2); // dest[127:0] = src1[127:32] | src2[31:0] + context.Assembler.Pshufd(dest, dest, (byte)mask1); // Inserted lane in original position. + + if (dest.GetRegister() != src1.GetRegister()) + { + context.Assembler.Pshufd(src1, src1, (byte)mask1); // Restore src1. + } + } + } + } + else + { + context.Assembler.Movss(dest, src1, src2); + } + } + else /* if (src2.Type == OperandType.FP64) */ + { + Debug.Assert(index < 2); + + if (index != 0) + { + context.Assembler.Movlhps(dest, src1, src2); + } + else + { + context.Assembler.Movsd(dest, src1, src2); + } + } + } + + private static void GenerateVectorInsert16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Value + Operand src3 = operation.GetSource(2); //Index + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + context.Assembler.Pinsrw(dest, src1, src2, index); + } + + private static void GenerateVectorInsert8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Value + Operand src3 = operation.GetSource(2); //Index + + // It's not possible to emulate this instruction without + // SSE 4.1 support without the use of a temporary register, + // so we instead handle that case on the pre-allocator when + // SSE 4.1 is not supported on the CPU. + Debug.Assert(HardwareCapabilities.SupportsSse41); + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + context.Assembler.Pinsrb(dest, src1, src2, index); + } + + private static void GenerateVectorOne(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + + Debug.Assert(!dest.Type.IsInteger()); + + context.Assembler.Pcmpeqw(dest, dest, dest); + } + + private static void GenerateVectorZero(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + + Debug.Assert(!dest.Type.IsInteger()); + + context.Assembler.Xorps(dest, dest, dest); + } + + private static void GenerateVectorZeroUpper64(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128); + + GenerateZeroUpper64(context, dest, source); + } + + private static void GenerateVectorZeroUpper96(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128); + + GenerateZeroUpper96(context, dest, source); + } + + private static void GenerateZeroExtend16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Movzx16(dest, source, OperandType.I32); + } + + private static void GenerateZeroExtend32(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + // We can eliminate the move if source is already 32-bit and the registers are the same. + if (dest.Value == source.Value && source.Type == OperandType.I32) + { + return; + } + + context.Assembler.Mov(dest, source, OperandType.I32); + } + + private static void GenerateZeroExtend8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Movzx8(dest, source, OperandType.I32); + } + + private static void GenerateLoad(CodeGenContext context, Operand address, Operand value) + { + switch (value.Type) + { + case OperandType.I32: context.Assembler.Mov (value, address, OperandType.I32); break; + case OperandType.I64: context.Assembler.Mov (value, address, OperandType.I64); break; + case OperandType.FP32: context.Assembler.Movd (value, address); break; + case OperandType.FP64: context.Assembler.Movq (value, address); break; + case OperandType.V128: context.Assembler.Movdqu(value, address); break; + + default: Debug.Assert(false); break; + } + } + + private static void GenerateStore(CodeGenContext context, Operand address, Operand value) + { + switch (value.Type) + { + case OperandType.I32: context.Assembler.Mov (address, value, OperandType.I32); break; + case OperandType.I64: context.Assembler.Mov (address, value, OperandType.I64); break; + case OperandType.FP32: context.Assembler.Movd (address, value); break; + case OperandType.FP64: context.Assembler.Movq (address, value); break; + case OperandType.V128: context.Assembler.Movdqu(address, value); break; + + default: Debug.Assert(false); break; + } + } + + private static void GenerateZeroUpper64(CodeGenContext context, Operand dest, Operand source) + { + context.Assembler.Movq(dest, source); + } + + private static void GenerateZeroUpper96(CodeGenContext context, Operand dest, Operand source) + { + context.Assembler.Movq(dest, source); + context.Assembler.Pshufd(dest, dest, 0xfc); + } + + private static bool MatchOperation(Operation node, Instruction inst, OperandType destType, Register destReg) + { + if (node == default || node.DestinationsCount == 0) + { + return false; + } + + if (node.Instruction != inst) + { + return false; + } + + Operand dest = node.Destination; + + return dest.Kind == OperandKind.Register && + dest.Type == destType && + dest.GetRegister() == destReg; + } + + [Conditional("DEBUG")] + private static void ValidateUnOp(Operand dest, Operand source) + { + EnsureSameReg (dest, source); + EnsureSameType(dest, source); + } + + [Conditional("DEBUG")] + private static void ValidateBinOp(Operand dest, Operand src1, Operand src2) + { + EnsureSameReg (dest, src1); + EnsureSameType(dest, src1, src2); + } + + [Conditional("DEBUG")] + private static void ValidateShift(Operand dest, Operand src1, Operand src2) + { + EnsureSameReg (dest, src1); + EnsureSameType(dest, src1); + + Debug.Assert(dest.Type.IsInteger() && src2.Type == OperandType.I32); + } + + private static void EnsureSameReg(Operand op1, Operand op2) + { + if (!op1.Type.IsInteger() && HardwareCapabilities.SupportsVexEncoding) + { + return; + } + + Debug.Assert(op1.Kind == OperandKind.Register || op1.Kind == OperandKind.Memory); + Debug.Assert(op1.Kind == op2.Kind); + Debug.Assert(op1.Value == op2.Value); + } + + private static void EnsureSameType(Operand op1, Operand op2) + { + Debug.Assert(op1.Type == op2.Type); + } + + private static void EnsureSameType(Operand op1, Operand op2, Operand op3) + { + Debug.Assert(op1.Type == op2.Type); + Debug.Assert(op1.Type == op3.Type); + } + + private static void EnsureSameType(Operand op1, Operand op2, Operand op3, Operand op4) + { + Debug.Assert(op1.Type == op2.Type); + Debug.Assert(op1.Type == op3.Type); + Debug.Assert(op1.Type == op4.Type); + } + + private static UnwindInfo WritePrologue(CodeGenContext context) + { + List<UnwindPushEntry> pushEntries = new List<UnwindPushEntry>(); + + Operand rsp = Register(X86Register.Rsp); + + int mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters; + + while (mask != 0) + { + int bit = BitOperations.TrailingZeroCount(mask); + + context.Assembler.Push(Register((X86Register)bit)); + + pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: bit)); + + mask &= ~(1 << bit); + } + + int reservedStackSize = context.CallArgsRegionSize + context.AllocResult.SpillRegionSize; + + reservedStackSize += context.XmmSaveRegionSize; + + if (reservedStackSize >= StackGuardSize) + { + GenerateInlineStackProbe(context, reservedStackSize); + } + + if (reservedStackSize != 0) + { + context.Assembler.Sub(rsp, Const(reservedStackSize), OperandType.I64); + + pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.AllocStack, context.StreamOffset, stackOffsetOrAllocSize: reservedStackSize)); + } + + int offset = reservedStackSize; + + mask = CallingConvention.GetVecCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters; + + while (mask != 0) + { + int bit = BitOperations.TrailingZeroCount(mask); + + offset -= 16; + + Operand memOp = MemoryOp(OperandType.V128, rsp, default, Multiplier.x1, offset); + + context.Assembler.Movdqu(memOp, Xmm((X86Register)bit)); + + pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.SaveXmm128, context.StreamOffset, bit, offset)); + + mask &= ~(1 << bit); + } + + return new UnwindInfo(pushEntries.ToArray(), context.StreamOffset); + } + + private static void WriteEpilogue(CodeGenContext context) + { + Operand rsp = Register(X86Register.Rsp); + + int reservedStackSize = context.CallArgsRegionSize + context.AllocResult.SpillRegionSize; + + reservedStackSize += context.XmmSaveRegionSize; + + int offset = reservedStackSize; + + int mask = CallingConvention.GetVecCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters; + + while (mask != 0) + { + int bit = BitOperations.TrailingZeroCount(mask); + + offset -= 16; + + Operand memOp = MemoryOp(OperandType.V128, rsp, default, Multiplier.x1, offset); + + context.Assembler.Movdqu(Xmm((X86Register)bit), memOp); + + mask &= ~(1 << bit); + } + + if (reservedStackSize != 0) + { + context.Assembler.Add(rsp, Const(reservedStackSize), OperandType.I64); + } + + mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters; + + while (mask != 0) + { + int bit = BitUtils.HighestBitSet(mask); + + context.Assembler.Pop(Register((X86Register)bit)); + + mask &= ~(1 << bit); + } + } + + private static void GenerateInlineStackProbe(CodeGenContext context, int size) + { + // Windows does lazy stack allocation, and there are just 2 + // guard pages on the end of the stack. So, if the allocation + // size we make is greater than this guard size, we must ensure + // that the OS will map all pages that we'll use. We do that by + // doing a dummy read on those pages, forcing a page fault and + // the OS to map them. If they are already mapped, nothing happens. + const int pageMask = PageSize - 1; + + size = (size + pageMask) & ~pageMask; + + Operand rsp = Register(X86Register.Rsp); + Operand temp = Register(CallingConvention.GetIntReturnRegister()); + + for (int offset = PageSize; offset < size; offset += PageSize) + { + Operand memOp = MemoryOp(OperandType.I32, rsp, default, Multiplier.x1, -offset); + + context.Assembler.Mov(temp, memOp, OperandType.I32); + } + } + + private static Operand Memory(Operand operand, OperandType type) + { + if (operand.Kind == OperandKind.Memory) + { + return operand; + } + + return MemoryOp(type, operand); + } + + private static Operand Register(X86Register register, OperandType type = OperandType.I64) + { + return Operand.Factory.Register((int)register, RegisterType.Integer, type); + } + + private static Operand Xmm(X86Register register) + { + return Operand.Factory.Register((int)register, RegisterType.Vector, OperandType.V128); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs b/src/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs new file mode 100644 index 00000000..07cdcd09 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs @@ -0,0 +1,144 @@ +using Ryujinx.Memory; +using System; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; + +namespace ARMeilleure.CodeGen.X86 +{ + static class HardwareCapabilities + { + private delegate uint GetXcr0(); + + static HardwareCapabilities() + { + if (!X86Base.IsSupported) + { + return; + } + + (int maxNum, _, _, _) = X86Base.CpuId(0x00000000, 0x00000000); + + (_, _, int ecx1, int edx1) = X86Base.CpuId(0x00000001, 0x00000000); + FeatureInfo1Edx = (FeatureFlags1Edx)edx1; + FeatureInfo1Ecx = (FeatureFlags1Ecx)ecx1; + + if (maxNum >= 7) + { + (_, int ebx7, int ecx7, _) = X86Base.CpuId(0x00000007, 0x00000000); + FeatureInfo7Ebx = (FeatureFlags7Ebx)ebx7; + FeatureInfo7Ecx = (FeatureFlags7Ecx)ecx7; + } + + Xcr0InfoEax = (Xcr0FlagsEax)GetXcr0Eax(); + } + + private static uint GetXcr0Eax() + { + if (!FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Xsave)) + { + // XSAVE feature required for xgetbv + return 0; + } + + ReadOnlySpan<byte> asmGetXcr0 = new byte[] + { + 0x31, 0xc9, // xor ecx, ecx + 0xf, 0x01, 0xd0, // xgetbv + 0xc3, // ret + }; + + using MemoryBlock memGetXcr0 = new MemoryBlock((ulong)asmGetXcr0.Length); + + memGetXcr0.Write(0, asmGetXcr0); + + memGetXcr0.Reprotect(0, (ulong)asmGetXcr0.Length, MemoryPermission.ReadAndExecute); + + var fGetXcr0 = Marshal.GetDelegateForFunctionPointer<GetXcr0>(memGetXcr0.Pointer); + + return fGetXcr0(); + } + + [Flags] + public enum FeatureFlags1Edx + { + Sse = 1 << 25, + Sse2 = 1 << 26 + } + + [Flags] + public enum FeatureFlags1Ecx + { + Sse3 = 1 << 0, + Pclmulqdq = 1 << 1, + Ssse3 = 1 << 9, + Fma = 1 << 12, + Sse41 = 1 << 19, + Sse42 = 1 << 20, + Popcnt = 1 << 23, + Aes = 1 << 25, + Xsave = 1 << 26, + Osxsave = 1 << 27, + Avx = 1 << 28, + F16c = 1 << 29 + } + + [Flags] + public enum FeatureFlags7Ebx + { + Avx2 = 1 << 5, + Avx512f = 1 << 16, + Avx512dq = 1 << 17, + Sha = 1 << 29, + Avx512bw = 1 << 30, + Avx512vl = 1 << 31 + } + + [Flags] + public enum FeatureFlags7Ecx + { + Gfni = 1 << 8, + } + + [Flags] + public enum Xcr0FlagsEax + { + Sse = 1 << 1, + YmmHi128 = 1 << 2, + Opmask = 1 << 5, + ZmmHi256 = 1 << 6, + Hi16Zmm = 1 << 7 + } + + public static FeatureFlags1Edx FeatureInfo1Edx { get; } + public static FeatureFlags1Ecx FeatureInfo1Ecx { get; } + public static FeatureFlags7Ebx FeatureInfo7Ebx { get; } = 0; + public static FeatureFlags7Ecx FeatureInfo7Ecx { get; } = 0; + public static Xcr0FlagsEax Xcr0InfoEax { get; } = 0; + + public static bool SupportsSse => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse); + public static bool SupportsSse2 => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse2); + public static bool SupportsSse3 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse3); + public static bool SupportsPclmulqdq => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Pclmulqdq); + public static bool SupportsSsse3 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Ssse3); + public static bool SupportsFma => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Fma); + public static bool SupportsSse41 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse41); + public static bool SupportsSse42 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse42); + public static bool SupportsPopcnt => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Popcnt); + public static bool SupportsAesni => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Aes); + public static bool SupportsAvx => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Avx | FeatureFlags1Ecx.Xsave | FeatureFlags1Ecx.Osxsave) && Xcr0InfoEax.HasFlag(Xcr0FlagsEax.Sse | Xcr0FlagsEax.YmmHi128); + public static bool SupportsAvx2 => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx2) && SupportsAvx; + public static bool SupportsAvx512F => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512f) && FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Xsave | FeatureFlags1Ecx.Osxsave) + && Xcr0InfoEax.HasFlag(Xcr0FlagsEax.Sse | Xcr0FlagsEax.YmmHi128 | Xcr0FlagsEax.Opmask | Xcr0FlagsEax.ZmmHi256 | Xcr0FlagsEax.Hi16Zmm); + public static bool SupportsAvx512Vl => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512vl) && SupportsAvx512F; + public static bool SupportsAvx512Bw => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512bw) && SupportsAvx512F; + public static bool SupportsAvx512Dq => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512dq) && SupportsAvx512F; + public static bool SupportsF16c => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.F16c); + public static bool SupportsSha => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Sha); + public static bool SupportsGfni => FeatureInfo7Ecx.HasFlag(FeatureFlags7Ecx.Gfni); + + public static bool ForceLegacySse { get; set; } + + public static bool SupportsVexEncoding => SupportsAvx && !ForceLegacySse; + public static bool SupportsEvexEncoding => SupportsAvx512F && !ForceLegacySse; + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs b/src/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs new file mode 100644 index 00000000..302bf4d3 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.CodeGen.X86 +{ + readonly struct IntrinsicInfo + { + public X86Instruction Inst { get; } + public IntrinsicType Type { get; } + + public IntrinsicInfo(X86Instruction inst, IntrinsicType type) + { + Inst = inst; + Type = type; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/src/ARMeilleure/CodeGen/X86/IntrinsicTable.cs new file mode 100644 index 00000000..e3d94b7a --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/IntrinsicTable.cs @@ -0,0 +1,200 @@ +using ARMeilleure.Common; +using ARMeilleure.IntermediateRepresentation; + +namespace ARMeilleure.CodeGen.X86 +{ + static class IntrinsicTable + { + private static IntrinsicInfo[] _intrinTable; + + static IntrinsicTable() + { + _intrinTable = new IntrinsicInfo[EnumUtils.GetCount(typeof(Intrinsic))]; + + Add(Intrinsic.X86Addpd, new IntrinsicInfo(X86Instruction.Addpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Addps, new IntrinsicInfo(X86Instruction.Addps, IntrinsicType.Binary)); + Add(Intrinsic.X86Addsd, new IntrinsicInfo(X86Instruction.Addsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Addss, new IntrinsicInfo(X86Instruction.Addss, IntrinsicType.Binary)); + Add(Intrinsic.X86Aesdec, new IntrinsicInfo(X86Instruction.Aesdec, IntrinsicType.Binary)); + Add(Intrinsic.X86Aesdeclast, new IntrinsicInfo(X86Instruction.Aesdeclast, IntrinsicType.Binary)); + Add(Intrinsic.X86Aesenc, new IntrinsicInfo(X86Instruction.Aesenc, IntrinsicType.Binary)); + Add(Intrinsic.X86Aesenclast, new IntrinsicInfo(X86Instruction.Aesenclast, IntrinsicType.Binary)); + Add(Intrinsic.X86Aesimc, new IntrinsicInfo(X86Instruction.Aesimc, IntrinsicType.Unary)); + Add(Intrinsic.X86Andnpd, new IntrinsicInfo(X86Instruction.Andnpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Andnps, new IntrinsicInfo(X86Instruction.Andnps, IntrinsicType.Binary)); + Add(Intrinsic.X86Andpd, new IntrinsicInfo(X86Instruction.Andpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Andps, new IntrinsicInfo(X86Instruction.Andps, IntrinsicType.Binary)); + Add(Intrinsic.X86Blendvpd, new IntrinsicInfo(X86Instruction.Blendvpd, IntrinsicType.Ternary)); + Add(Intrinsic.X86Blendvps, new IntrinsicInfo(X86Instruction.Blendvps, IntrinsicType.Ternary)); + Add(Intrinsic.X86Cmppd, new IntrinsicInfo(X86Instruction.Cmppd, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Cmpps, new IntrinsicInfo(X86Instruction.Cmpps, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Cmpsd, new IntrinsicInfo(X86Instruction.Cmpsd, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Cmpss, new IntrinsicInfo(X86Instruction.Cmpss, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Comisdeq, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_)); + Add(Intrinsic.X86Comisdge, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_)); + Add(Intrinsic.X86Comisdlt, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_)); + Add(Intrinsic.X86Comisseq, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_)); + Add(Intrinsic.X86Comissge, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_)); + Add(Intrinsic.X86Comisslt, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_)); + Add(Intrinsic.X86Crc32, new IntrinsicInfo(X86Instruction.Crc32, IntrinsicType.Crc32)); + Add(Intrinsic.X86Crc32_16, new IntrinsicInfo(X86Instruction.Crc32_16, IntrinsicType.Crc32)); + Add(Intrinsic.X86Crc32_8, new IntrinsicInfo(X86Instruction.Crc32_8, IntrinsicType.Crc32)); + Add(Intrinsic.X86Cvtdq2pd, new IntrinsicInfo(X86Instruction.Cvtdq2pd, IntrinsicType.Unary)); + Add(Intrinsic.X86Cvtdq2ps, new IntrinsicInfo(X86Instruction.Cvtdq2ps, IntrinsicType.Unary)); + Add(Intrinsic.X86Cvtpd2dq, new IntrinsicInfo(X86Instruction.Cvtpd2dq, IntrinsicType.Unary)); + Add(Intrinsic.X86Cvtpd2ps, new IntrinsicInfo(X86Instruction.Cvtpd2ps, IntrinsicType.Unary)); + Add(Intrinsic.X86Cvtps2dq, new IntrinsicInfo(X86Instruction.Cvtps2dq, IntrinsicType.Unary)); + Add(Intrinsic.X86Cvtps2pd, new IntrinsicInfo(X86Instruction.Cvtps2pd, IntrinsicType.Unary)); + Add(Intrinsic.X86Cvtsd2si, new IntrinsicInfo(X86Instruction.Cvtsd2si, IntrinsicType.UnaryToGpr)); + Add(Intrinsic.X86Cvtsd2ss, new IntrinsicInfo(X86Instruction.Cvtsd2ss, IntrinsicType.Binary)); + Add(Intrinsic.X86Cvtsi2sd, new IntrinsicInfo(X86Instruction.Cvtsi2sd, IntrinsicType.BinaryGpr)); + Add(Intrinsic.X86Cvtsi2si, new IntrinsicInfo(X86Instruction.Movd, IntrinsicType.UnaryToGpr)); + Add(Intrinsic.X86Cvtsi2ss, new IntrinsicInfo(X86Instruction.Cvtsi2ss, IntrinsicType.BinaryGpr)); + Add(Intrinsic.X86Cvtss2sd, new IntrinsicInfo(X86Instruction.Cvtss2sd, IntrinsicType.Binary)); + Add(Intrinsic.X86Cvtss2si, new IntrinsicInfo(X86Instruction.Cvtss2si, IntrinsicType.UnaryToGpr)); + Add(Intrinsic.X86Divpd, new IntrinsicInfo(X86Instruction.Divpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Divps, new IntrinsicInfo(X86Instruction.Divps, IntrinsicType.Binary)); + Add(Intrinsic.X86Divsd, new IntrinsicInfo(X86Instruction.Divsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Divss, new IntrinsicInfo(X86Instruction.Divss, IntrinsicType.Binary)); + Add(Intrinsic.X86Gf2p8affineqb, new IntrinsicInfo(X86Instruction.Gf2p8affineqb, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Haddpd, new IntrinsicInfo(X86Instruction.Haddpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Haddps, new IntrinsicInfo(X86Instruction.Haddps, IntrinsicType.Binary)); + Add(Intrinsic.X86Insertps, new IntrinsicInfo(X86Instruction.Insertps, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Ldmxcsr, new IntrinsicInfo(X86Instruction.None, IntrinsicType.Mxcsr)); + Add(Intrinsic.X86Maxpd, new IntrinsicInfo(X86Instruction.Maxpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Maxps, new IntrinsicInfo(X86Instruction.Maxps, IntrinsicType.Binary)); + Add(Intrinsic.X86Maxsd, new IntrinsicInfo(X86Instruction.Maxsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Maxss, new IntrinsicInfo(X86Instruction.Maxss, IntrinsicType.Binary)); + Add(Intrinsic.X86Minpd, new IntrinsicInfo(X86Instruction.Minpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Minps, new IntrinsicInfo(X86Instruction.Minps, IntrinsicType.Binary)); + Add(Intrinsic.X86Minsd, new IntrinsicInfo(X86Instruction.Minsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Minss, new IntrinsicInfo(X86Instruction.Minss, IntrinsicType.Binary)); + Add(Intrinsic.X86Movhlps, new IntrinsicInfo(X86Instruction.Movhlps, IntrinsicType.Binary)); + Add(Intrinsic.X86Movlhps, new IntrinsicInfo(X86Instruction.Movlhps, IntrinsicType.Binary)); + Add(Intrinsic.X86Movss, new IntrinsicInfo(X86Instruction.Movss, IntrinsicType.Binary)); + Add(Intrinsic.X86Mulpd, new IntrinsicInfo(X86Instruction.Mulpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Mulps, new IntrinsicInfo(X86Instruction.Mulps, IntrinsicType.Binary)); + Add(Intrinsic.X86Mulsd, new IntrinsicInfo(X86Instruction.Mulsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Mulss, new IntrinsicInfo(X86Instruction.Mulss, IntrinsicType.Binary)); + Add(Intrinsic.X86Paddb, new IntrinsicInfo(X86Instruction.Paddb, IntrinsicType.Binary)); + Add(Intrinsic.X86Paddd, new IntrinsicInfo(X86Instruction.Paddd, IntrinsicType.Binary)); + Add(Intrinsic.X86Paddq, new IntrinsicInfo(X86Instruction.Paddq, IntrinsicType.Binary)); + Add(Intrinsic.X86Paddw, new IntrinsicInfo(X86Instruction.Paddw, IntrinsicType.Binary)); + Add(Intrinsic.X86Palignr, new IntrinsicInfo(X86Instruction.Palignr, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Pand, new IntrinsicInfo(X86Instruction.Pand, IntrinsicType.Binary)); + Add(Intrinsic.X86Pandn, new IntrinsicInfo(X86Instruction.Pandn, IntrinsicType.Binary)); + Add(Intrinsic.X86Pavgb, new IntrinsicInfo(X86Instruction.Pavgb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pavgw, new IntrinsicInfo(X86Instruction.Pavgw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pblendvb, new IntrinsicInfo(X86Instruction.Pblendvb, IntrinsicType.Ternary)); + Add(Intrinsic.X86Pclmulqdq, new IntrinsicInfo(X86Instruction.Pclmulqdq, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Pcmpeqb, new IntrinsicInfo(X86Instruction.Pcmpeqb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpeqd, new IntrinsicInfo(X86Instruction.Pcmpeqd, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpeqq, new IntrinsicInfo(X86Instruction.Pcmpeqq, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpeqw, new IntrinsicInfo(X86Instruction.Pcmpeqw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpgtb, new IntrinsicInfo(X86Instruction.Pcmpgtb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpgtd, new IntrinsicInfo(X86Instruction.Pcmpgtd, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpgtq, new IntrinsicInfo(X86Instruction.Pcmpgtq, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpgtw, new IntrinsicInfo(X86Instruction.Pcmpgtw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmaxsb, new IntrinsicInfo(X86Instruction.Pmaxsb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmaxsd, new IntrinsicInfo(X86Instruction.Pmaxsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmaxsw, new IntrinsicInfo(X86Instruction.Pmaxsw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmaxub, new IntrinsicInfo(X86Instruction.Pmaxub, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmaxud, new IntrinsicInfo(X86Instruction.Pmaxud, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmaxuw, new IntrinsicInfo(X86Instruction.Pmaxuw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pminsb, new IntrinsicInfo(X86Instruction.Pminsb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pminsd, new IntrinsicInfo(X86Instruction.Pminsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Pminsw, new IntrinsicInfo(X86Instruction.Pminsw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pminub, new IntrinsicInfo(X86Instruction.Pminub, IntrinsicType.Binary)); + Add(Intrinsic.X86Pminud, new IntrinsicInfo(X86Instruction.Pminud, IntrinsicType.Binary)); + Add(Intrinsic.X86Pminuw, new IntrinsicInfo(X86Instruction.Pminuw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmovsxbw, new IntrinsicInfo(X86Instruction.Pmovsxbw, IntrinsicType.Unary)); + Add(Intrinsic.X86Pmovsxdq, new IntrinsicInfo(X86Instruction.Pmovsxdq, IntrinsicType.Unary)); + Add(Intrinsic.X86Pmovsxwd, new IntrinsicInfo(X86Instruction.Pmovsxwd, IntrinsicType.Unary)); + Add(Intrinsic.X86Pmovzxbw, new IntrinsicInfo(X86Instruction.Pmovzxbw, IntrinsicType.Unary)); + Add(Intrinsic.X86Pmovzxdq, new IntrinsicInfo(X86Instruction.Pmovzxdq, IntrinsicType.Unary)); + Add(Intrinsic.X86Pmovzxwd, new IntrinsicInfo(X86Instruction.Pmovzxwd, IntrinsicType.Unary)); + Add(Intrinsic.X86Pmulld, new IntrinsicInfo(X86Instruction.Pmulld, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmullw, new IntrinsicInfo(X86Instruction.Pmullw, IntrinsicType.Binary)); + Add(Intrinsic.X86Popcnt, new IntrinsicInfo(X86Instruction.Popcnt, IntrinsicType.PopCount)); + Add(Intrinsic.X86Por, new IntrinsicInfo(X86Instruction.Por, IntrinsicType.Binary)); + Add(Intrinsic.X86Pshufb, new IntrinsicInfo(X86Instruction.Pshufb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pshufd, new IntrinsicInfo(X86Instruction.Pshufd, IntrinsicType.BinaryImm)); + Add(Intrinsic.X86Pslld, new IntrinsicInfo(X86Instruction.Pslld, IntrinsicType.Binary)); + Add(Intrinsic.X86Pslldq, new IntrinsicInfo(X86Instruction.Pslldq, IntrinsicType.Binary)); + Add(Intrinsic.X86Psllq, new IntrinsicInfo(X86Instruction.Psllq, IntrinsicType.Binary)); + Add(Intrinsic.X86Psllw, new IntrinsicInfo(X86Instruction.Psllw, IntrinsicType.Binary)); + Add(Intrinsic.X86Psrad, new IntrinsicInfo(X86Instruction.Psrad, IntrinsicType.Binary)); + Add(Intrinsic.X86Psraw, new IntrinsicInfo(X86Instruction.Psraw, IntrinsicType.Binary)); + Add(Intrinsic.X86Psrld, new IntrinsicInfo(X86Instruction.Psrld, IntrinsicType.Binary)); + Add(Intrinsic.X86Psrlq, new IntrinsicInfo(X86Instruction.Psrlq, IntrinsicType.Binary)); + Add(Intrinsic.X86Psrldq, new IntrinsicInfo(X86Instruction.Psrldq, IntrinsicType.Binary)); + Add(Intrinsic.X86Psrlw, new IntrinsicInfo(X86Instruction.Psrlw, IntrinsicType.Binary)); + Add(Intrinsic.X86Psubb, new IntrinsicInfo(X86Instruction.Psubb, IntrinsicType.Binary)); + Add(Intrinsic.X86Psubd, new IntrinsicInfo(X86Instruction.Psubd, IntrinsicType.Binary)); + Add(Intrinsic.X86Psubq, new IntrinsicInfo(X86Instruction.Psubq, IntrinsicType.Binary)); + Add(Intrinsic.X86Psubw, new IntrinsicInfo(X86Instruction.Psubw, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpckhbw, new IntrinsicInfo(X86Instruction.Punpckhbw, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpckhdq, new IntrinsicInfo(X86Instruction.Punpckhdq, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpckhqdq, new IntrinsicInfo(X86Instruction.Punpckhqdq, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpckhwd, new IntrinsicInfo(X86Instruction.Punpckhwd, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpcklbw, new IntrinsicInfo(X86Instruction.Punpcklbw, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpckldq, new IntrinsicInfo(X86Instruction.Punpckldq, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpcklqdq, new IntrinsicInfo(X86Instruction.Punpcklqdq, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpcklwd, new IntrinsicInfo(X86Instruction.Punpcklwd, IntrinsicType.Binary)); + Add(Intrinsic.X86Pxor, new IntrinsicInfo(X86Instruction.Pxor, IntrinsicType.Binary)); + Add(Intrinsic.X86Rcpps, new IntrinsicInfo(X86Instruction.Rcpps, IntrinsicType.Unary)); + Add(Intrinsic.X86Rcpss, new IntrinsicInfo(X86Instruction.Rcpss, IntrinsicType.Unary)); + Add(Intrinsic.X86Roundpd, new IntrinsicInfo(X86Instruction.Roundpd, IntrinsicType.BinaryImm)); + Add(Intrinsic.X86Roundps, new IntrinsicInfo(X86Instruction.Roundps, IntrinsicType.BinaryImm)); + Add(Intrinsic.X86Roundsd, new IntrinsicInfo(X86Instruction.Roundsd, IntrinsicType.BinaryImm)); + Add(Intrinsic.X86Roundss, new IntrinsicInfo(X86Instruction.Roundss, IntrinsicType.BinaryImm)); + Add(Intrinsic.X86Rsqrtps, new IntrinsicInfo(X86Instruction.Rsqrtps, IntrinsicType.Unary)); + Add(Intrinsic.X86Rsqrtss, new IntrinsicInfo(X86Instruction.Rsqrtss, IntrinsicType.Unary)); + Add(Intrinsic.X86Sha256Msg1, new IntrinsicInfo(X86Instruction.Sha256Msg1, IntrinsicType.Binary)); + Add(Intrinsic.X86Sha256Msg2, new IntrinsicInfo(X86Instruction.Sha256Msg2, IntrinsicType.Binary)); + Add(Intrinsic.X86Sha256Rnds2, new IntrinsicInfo(X86Instruction.Sha256Rnds2, IntrinsicType.Ternary)); + Add(Intrinsic.X86Shufpd, new IntrinsicInfo(X86Instruction.Shufpd, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Shufps, new IntrinsicInfo(X86Instruction.Shufps, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Sqrtpd, new IntrinsicInfo(X86Instruction.Sqrtpd, IntrinsicType.Unary)); + Add(Intrinsic.X86Sqrtps, new IntrinsicInfo(X86Instruction.Sqrtps, IntrinsicType.Unary)); + Add(Intrinsic.X86Sqrtsd, new IntrinsicInfo(X86Instruction.Sqrtsd, IntrinsicType.Unary)); + Add(Intrinsic.X86Sqrtss, new IntrinsicInfo(X86Instruction.Sqrtss, IntrinsicType.Unary)); + Add(Intrinsic.X86Stmxcsr, new IntrinsicInfo(X86Instruction.None, IntrinsicType.Mxcsr)); + Add(Intrinsic.X86Subpd, new IntrinsicInfo(X86Instruction.Subpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Subps, new IntrinsicInfo(X86Instruction.Subps, IntrinsicType.Binary)); + Add(Intrinsic.X86Subsd, new IntrinsicInfo(X86Instruction.Subsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Subss, new IntrinsicInfo(X86Instruction.Subss, IntrinsicType.Binary)); + Add(Intrinsic.X86Unpckhpd, new IntrinsicInfo(X86Instruction.Unpckhpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Unpckhps, new IntrinsicInfo(X86Instruction.Unpckhps, IntrinsicType.Binary)); + Add(Intrinsic.X86Unpcklpd, new IntrinsicInfo(X86Instruction.Unpcklpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Unpcklps, new IntrinsicInfo(X86Instruction.Unpcklps, IntrinsicType.Binary)); + Add(Intrinsic.X86Vcvtph2ps, new IntrinsicInfo(X86Instruction.Vcvtph2ps, IntrinsicType.Unary)); + Add(Intrinsic.X86Vcvtps2ph, new IntrinsicInfo(X86Instruction.Vcvtps2ph, IntrinsicType.BinaryImm)); + Add(Intrinsic.X86Vfmadd231pd, new IntrinsicInfo(X86Instruction.Vfmadd231pd, IntrinsicType.Fma)); + Add(Intrinsic.X86Vfmadd231ps, new IntrinsicInfo(X86Instruction.Vfmadd231ps, IntrinsicType.Fma)); + Add(Intrinsic.X86Vfmadd231sd, new IntrinsicInfo(X86Instruction.Vfmadd231sd, IntrinsicType.Fma)); + Add(Intrinsic.X86Vfmadd231ss, new IntrinsicInfo(X86Instruction.Vfmadd231ss, IntrinsicType.Fma)); + Add(Intrinsic.X86Vfmsub231sd, new IntrinsicInfo(X86Instruction.Vfmsub231sd, IntrinsicType.Fma)); + Add(Intrinsic.X86Vfmsub231ss, new IntrinsicInfo(X86Instruction.Vfmsub231ss, IntrinsicType.Fma)); + Add(Intrinsic.X86Vfnmadd231pd, new IntrinsicInfo(X86Instruction.Vfnmadd231pd, IntrinsicType.Fma)); + Add(Intrinsic.X86Vfnmadd231ps, new IntrinsicInfo(X86Instruction.Vfnmadd231ps, IntrinsicType.Fma)); + Add(Intrinsic.X86Vfnmadd231sd, new IntrinsicInfo(X86Instruction.Vfnmadd231sd, IntrinsicType.Fma)); + Add(Intrinsic.X86Vfnmadd231ss, new IntrinsicInfo(X86Instruction.Vfnmadd231ss, IntrinsicType.Fma)); + Add(Intrinsic.X86Vfnmsub231sd, new IntrinsicInfo(X86Instruction.Vfnmsub231sd, IntrinsicType.Fma)); + Add(Intrinsic.X86Vfnmsub231ss, new IntrinsicInfo(X86Instruction.Vfnmsub231ss, IntrinsicType.Fma)); + Add(Intrinsic.X86Vpternlogd, new IntrinsicInfo(X86Instruction.Vpternlogd, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Xorps, new IntrinsicInfo(X86Instruction.Xorps, IntrinsicType.Binary)); + } + + private static void Add(Intrinsic intrin, IntrinsicInfo info) + { + _intrinTable[(int)intrin] = info; + } + + public static IntrinsicInfo GetInfo(Intrinsic intrin) + { + return _intrinTable[(int)intrin]; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/IntrinsicType.cs b/src/ARMeilleure/CodeGen/X86/IntrinsicType.cs new file mode 100644 index 00000000..5a9c14af --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/IntrinsicType.cs @@ -0,0 +1,18 @@ +namespace ARMeilleure.CodeGen.X86 +{ + enum IntrinsicType + { + Comis_, + Mxcsr, + PopCount, + Unary, + UnaryToGpr, + Binary, + BinaryGpr, + BinaryImm, + Crc32, + Ternary, + TernaryImm, + Fma + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/Mxcsr.cs b/src/ARMeilleure/CodeGen/X86/Mxcsr.cs new file mode 100644 index 00000000..c61eac31 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/Mxcsr.cs @@ -0,0 +1,15 @@ +using System; + +namespace ARMeilleure.CodeGen.X86 +{ + [Flags] + enum Mxcsr + { + Ftz = 1 << 15, // Flush To Zero. + Rhi = 1 << 14, // Round Mode high bit. + Rlo = 1 << 13, // Round Mode low bit. + Um = 1 << 11, // Underflow Mask. + Dm = 1 << 8, // Denormal Mask. + Daz = 1 << 6 // Denormals Are Zero. + } +} diff --git a/src/ARMeilleure/CodeGen/X86/PreAllocator.cs b/src/ARMeilleure/CodeGen/X86/PreAllocator.cs new file mode 100644 index 00000000..cb742d67 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/PreAllocator.cs @@ -0,0 +1,796 @@ +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.CodeGen.X86 +{ + class PreAllocator + { + public static void RunPass(CompilerContext cctx, StackAllocator stackAlloc, out int maxCallArgs) + { + maxCallArgs = -1; + + Span<Operation> buffer = default; + + CallConvName callConv = CallingConvention.GetCurrentCallConv(); + + Operand[] preservedArgs = new Operand[CallingConvention.GetArgumentsOnRegsCount()]; + + for (BasicBlock block = cctx.Cfg.Blocks.First; block != null; block = block.ListNext) + { + Operation nextNode; + + for (Operation node = block.Operations.First; node != default; node = nextNode) + { + nextNode = node.ListNext; + + if (node.Instruction == Instruction.Phi) + { + continue; + } + + InsertConstantRegCopies(block.Operations, node); + InsertDestructiveRegCopies(block.Operations, node); + InsertConstrainedRegCopies(block.Operations, node); + + switch (node.Instruction) + { + case Instruction.Call: + // Get the maximum number of arguments used on a call. + // On windows, when a struct is returned from the call, + // we also need to pass the pointer where the struct + // should be written on the first argument. + int argsCount = node.SourcesCount - 1; + + if (node.Destination != default && node.Destination.Type == OperandType.V128) + { + argsCount++; + } + + if (maxCallArgs < argsCount) + { + maxCallArgs = argsCount; + } + + // Copy values to registers expected by the function + // being called, as mandated by the ABI. + if (callConv == CallConvName.Windows) + { + PreAllocatorWindows.InsertCallCopies(block.Operations, stackAlloc, node); + } + else /* if (callConv == CallConvName.SystemV) */ + { + PreAllocatorSystemV.InsertCallCopies(block.Operations, node); + } + break; + + case Instruction.ConvertToFPUI: + GenerateConvertToFPUI(block.Operations, node); + break; + + case Instruction.LoadArgument: + if (callConv == CallConvName.Windows) + { + nextNode = PreAllocatorWindows.InsertLoadArgumentCopy(cctx, ref buffer, block.Operations, preservedArgs, node); + } + else /* if (callConv == CallConvName.SystemV) */ + { + nextNode = PreAllocatorSystemV.InsertLoadArgumentCopy(cctx, ref buffer, block.Operations, preservedArgs, node); + } + break; + + case Instruction.Negate: + if (!node.GetSource(0).Type.IsInteger()) + { + GenerateNegate(block.Operations, node); + } + break; + + case Instruction.Return: + if (callConv == CallConvName.Windows) + { + PreAllocatorWindows.InsertReturnCopy(cctx, block.Operations, preservedArgs, node); + } + else /* if (callConv == CallConvName.SystemV) */ + { + PreAllocatorSystemV.InsertReturnCopy(block.Operations, node); + } + break; + + case Instruction.Tailcall: + if (callConv == CallConvName.Windows) + { + PreAllocatorWindows.InsertTailcallCopies(block.Operations, stackAlloc, node); + } + else + { + PreAllocatorSystemV.InsertTailcallCopies(block.Operations, stackAlloc, node); + } + break; + + case Instruction.VectorInsert8: + if (!HardwareCapabilities.SupportsSse41) + { + GenerateVectorInsert8(block.Operations, node); + } + break; + + case Instruction.Extended: + if (node.Intrinsic == Intrinsic.X86Ldmxcsr) + { + int stackOffset = stackAlloc.Allocate(OperandType.I32); + + node.SetSources(new Operand[] { Const(stackOffset), node.GetSource(0) }); + } + else if (node.Intrinsic == Intrinsic.X86Stmxcsr) + { + int stackOffset = stackAlloc.Allocate(OperandType.I32); + + node.SetSources(new Operand[] { Const(stackOffset) }); + } + break; + } + } + } + } + + protected static void InsertConstantRegCopies(IntrusiveList<Operation> nodes, Operation node) + { + if (node.SourcesCount == 0 || IsXmmIntrinsic(node)) + { + return; + } + + Instruction inst = node.Instruction; + + Operand src1 = node.GetSource(0); + Operand src2; + + if (src1.Kind == OperandKind.Constant) + { + if (!src1.Type.IsInteger()) + { + // Handle non-integer types (FP32, FP64 and V128). + // For instructions without an immediate operand, we do the following: + // - Insert a copy with the constant value (as integer) to a GPR. + // - Insert a copy from the GPR to a XMM register. + // - Replace the constant use with the XMM register. + src1 = AddXmmCopy(nodes, node, src1); + + node.SetSource(0, src1); + } + else if (!HasConstSrc1(inst)) + { + // Handle integer types. + // Most ALU instructions accepts a 32-bits immediate on the second operand. + // We need to ensure the following: + // - If the constant is on operand 1, we need to move it. + // -- But first, we try to swap operand 1 and 2 if the instruction is commutative. + // -- Doing so may allow us to encode the constant as operand 2 and avoid a copy. + // - If the constant is on operand 2, we check if the instruction supports it, + // if not, we also add a copy. 64-bits constants are usually not supported. + if (IsCommutative(node)) + { + src2 = node.GetSource(1); + + Operand temp = src1; + + src1 = src2; + src2 = temp; + + node.SetSource(0, src1); + node.SetSource(1, src2); + } + + if (src1.Kind == OperandKind.Constant) + { + src1 = AddCopy(nodes, node, src1); + + node.SetSource(0, src1); + } + } + } + + if (node.SourcesCount < 2) + { + return; + } + + src2 = node.GetSource(1); + + if (src2.Kind == OperandKind.Constant) + { + if (!src2.Type.IsInteger()) + { + src2 = AddXmmCopy(nodes, node, src2); + + node.SetSource(1, src2); + } + else if (!HasConstSrc2(inst) || CodeGenCommon.IsLongConst(src2)) + { + src2 = AddCopy(nodes, node, src2); + + node.SetSource(1, src2); + } + } + } + + protected static void InsertConstrainedRegCopies(IntrusiveList<Operation> nodes, Operation node) + { + Operand dest = node.Destination; + + switch (node.Instruction) + { + case Instruction.CompareAndSwap: + case Instruction.CompareAndSwap16: + case Instruction.CompareAndSwap8: + { + OperandType type = node.GetSource(1).Type; + + if (type == OperandType.V128) + { + // Handle the many restrictions of the compare and exchange (16 bytes) instruction: + // - The expected value should be in RDX:RAX. + // - The new value to be written should be in RCX:RBX. + // - The value at the memory location is loaded to RDX:RAX. + void SplitOperand(Operand source, Operand lr, Operand hr) + { + nodes.AddBefore(node, Operation(Instruction.VectorExtract, lr, source, Const(0))); + nodes.AddBefore(node, Operation(Instruction.VectorExtract, hr, source, Const(1))); + } + + Operand rax = Gpr(X86Register.Rax, OperandType.I64); + Operand rbx = Gpr(X86Register.Rbx, OperandType.I64); + Operand rcx = Gpr(X86Register.Rcx, OperandType.I64); + Operand rdx = Gpr(X86Register.Rdx, OperandType.I64); + + SplitOperand(node.GetSource(1), rax, rdx); + SplitOperand(node.GetSource(2), rbx, rcx); + + Operation operation = node; + + node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, rax)); + nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, rdx, Const(1))); + + operation.SetDestinations(new Operand[] { rdx, rax }); + operation.SetSources(new Operand[] { operation.GetSource(0), rdx, rax, rcx, rbx }); + } + else + { + // Handle the many restrictions of the compare and exchange (32/64) instruction: + // - The expected value should be in (E/R)AX. + // - The value at the memory location is loaded to (E/R)AX. + Operand expected = node.GetSource(1); + Operand newValue = node.GetSource(2); + + Operand rax = Gpr(X86Register.Rax, expected.Type); + + nodes.AddBefore(node, Operation(Instruction.Copy, rax, expected)); + + // We need to store the new value into a temp, since it may + // be a constant, and this instruction does not support immediate operands. + Operand temp = Local(newValue.Type); + + nodes.AddBefore(node, Operation(Instruction.Copy, temp, newValue)); + + node.SetSources(new Operand[] { node.GetSource(0), rax, temp }); + + nodes.AddAfter(node, Operation(Instruction.Copy, dest, rax)); + + node.Destination = rax; + } + + break; + } + + case Instruction.Divide: + case Instruction.DivideUI: + { + // Handle the many restrictions of the division instructions: + // - The dividend is always in RDX:RAX. + // - The result is always in RAX. + // - Additionally it also writes the remainder in RDX. + if (dest.Type.IsInteger()) + { + Operand src1 = node.GetSource(0); + + Operand rax = Gpr(X86Register.Rax, src1.Type); + Operand rdx = Gpr(X86Register.Rdx, src1.Type); + + nodes.AddBefore(node, Operation(Instruction.Copy, rax, src1)); + nodes.AddBefore(node, Operation(Instruction.Clobber, rdx)); + + nodes.AddAfter(node, Operation(Instruction.Copy, dest, rax)); + + node.SetSources(new Operand[] { rdx, rax, node.GetSource(1) }); + node.Destination = rax; + } + + break; + } + + case Instruction.Extended: + { + bool isBlend = node.Intrinsic == Intrinsic.X86Blendvpd || + node.Intrinsic == Intrinsic.X86Blendvps || + node.Intrinsic == Intrinsic.X86Pblendvb; + + // BLENDVPD, BLENDVPS, PBLENDVB last operand is always implied to be XMM0 when VEX is not supported. + // SHA256RNDS2 always has an implied XMM0 as a last operand. + if ((isBlend && !HardwareCapabilities.SupportsVexEncoding) || node.Intrinsic == Intrinsic.X86Sha256Rnds2) + { + Operand xmm0 = Xmm(X86Register.Xmm0, OperandType.V128); + + nodes.AddBefore(node, Operation(Instruction.Copy, xmm0, node.GetSource(2))); + + node.SetSource(2, xmm0); + } + + break; + } + + case Instruction.Multiply64HighSI: + case Instruction.Multiply64HighUI: + { + // Handle the many restrictions of the i64 * i64 = i128 multiply instructions: + // - The multiplicand is always in RAX. + // - The lower 64-bits of the result is always in RAX. + // - The higher 64-bits of the result is always in RDX. + Operand src1 = node.GetSource(0); + + Operand rax = Gpr(X86Register.Rax, src1.Type); + Operand rdx = Gpr(X86Register.Rdx, src1.Type); + + nodes.AddBefore(node, Operation(Instruction.Copy, rax, src1)); + + node.SetSource(0, rax); + + nodes.AddAfter(node, Operation(Instruction.Copy, dest, rdx)); + + node.SetDestinations(new Operand[] { rdx, rax }); + + break; + } + + case Instruction.RotateRight: + case Instruction.ShiftLeft: + case Instruction.ShiftRightSI: + case Instruction.ShiftRightUI: + { + // The shift register is always implied to be CL (low 8-bits of RCX or ECX). + if (node.GetSource(1).Kind == OperandKind.LocalVariable) + { + Operand rcx = Gpr(X86Register.Rcx, OperandType.I32); + + nodes.AddBefore(node, Operation(Instruction.Copy, rcx, node.GetSource(1))); + + node.SetSource(1, rcx); + } + + break; + } + } + } + + protected static void InsertDestructiveRegCopies(IntrusiveList<Operation> nodes, Operation node) + { + if (node.Destination == default || node.SourcesCount == 0) + { + return; + } + + Instruction inst = node.Instruction; + + Operand dest = node.Destination; + Operand src1 = node.GetSource(0); + + // The multiply instruction (that maps to IMUL) is somewhat special, it has + // a three operand form where the second source is a immediate value. + bool threeOperandForm = inst == Instruction.Multiply && node.GetSource(1).Kind == OperandKind.Constant; + + if (IsSameOperandDestSrc1(node) && src1.Kind == OperandKind.LocalVariable && !threeOperandForm) + { + bool useNewLocal = false; + + for (int srcIndex = 1; srcIndex < node.SourcesCount; srcIndex++) + { + if (node.GetSource(srcIndex) == dest) + { + useNewLocal = true; + + break; + } + } + + if (useNewLocal) + { + // Dest is being used as some source already, we need to use a new + // local to store the temporary value, otherwise the value on dest + // local would be overwritten. + Operand temp = Local(dest.Type); + + nodes.AddBefore(node, Operation(Instruction.Copy, temp, src1)); + + node.SetSource(0, temp); + + nodes.AddAfter(node, Operation(Instruction.Copy, dest, temp)); + + node.Destination = temp; + } + else + { + nodes.AddBefore(node, Operation(Instruction.Copy, dest, src1)); + + node.SetSource(0, dest); + } + } + else if (inst == Instruction.ConditionalSelect) + { + Operand src2 = node.GetSource(1); + Operand src3 = node.GetSource(2); + + if (src1 == dest || src2 == dest) + { + Operand temp = Local(dest.Type); + + nodes.AddBefore(node, Operation(Instruction.Copy, temp, src3)); + + node.SetSource(2, temp); + + nodes.AddAfter(node, Operation(Instruction.Copy, dest, temp)); + + node.Destination = temp; + } + else + { + nodes.AddBefore(node, Operation(Instruction.Copy, dest, src3)); + + node.SetSource(2, dest); + } + } + } + + private static void GenerateConvertToFPUI(IntrusiveList<Operation> nodes, Operation node) + { + // Unsigned integer to FP conversions are not supported on X86. + // We need to turn them into signed integer to FP conversions, and + // adjust the final result. + Operand dest = node.Destination; + Operand source = node.GetSource(0); + + Debug.Assert(source.Type.IsInteger(), $"Invalid source type \"{source.Type}\"."); + + Operation currentNode = node; + + if (source.Type == OperandType.I32) + { + // For 32-bits integers, we can just zero-extend to 64-bits, + // and then use the 64-bits signed conversion instructions. + Operand zex = Local(OperandType.I64); + + node = nodes.AddAfter(node, Operation(Instruction.ZeroExtend32, zex, source)); + node = nodes.AddAfter(node, Operation(Instruction.ConvertToFP, dest, zex)); + } + else /* if (source.Type == OperandType.I64) */ + { + // For 64-bits integers, we need to do the following: + // - Ensure that the integer has the most significant bit clear. + // -- This can be done by shifting the value right by 1, that is, dividing by 2. + // -- The least significant bit is lost in this case though. + // - We can then convert the shifted value with a signed integer instruction. + // - The result still needs to be corrected after that. + // -- First, we need to multiply the result by 2, as we divided it by 2 before. + // --- This can be done efficiently by adding the result to itself. + // -- Then, we need to add the least significant bit that was shifted out. + // --- We can convert the least significant bit to float, and add it to the result. + Operand lsb = Local(OperandType.I64); + Operand half = Local(OperandType.I64); + + Operand lsbF = Local(dest.Type); + + node = nodes.AddAfter(node, Operation(Instruction.Copy, lsb, source)); + node = nodes.AddAfter(node, Operation(Instruction.Copy, half, source)); + + node = nodes.AddAfter(node, Operation(Instruction.BitwiseAnd, lsb, lsb, Const(1L))); + node = nodes.AddAfter(node, Operation(Instruction.ShiftRightUI, half, half, Const(1))); + + node = nodes.AddAfter(node, Operation(Instruction.ConvertToFP, lsbF, lsb)); + node = nodes.AddAfter(node, Operation(Instruction.ConvertToFP, dest, half)); + + node = nodes.AddAfter(node, Operation(Instruction.Add, dest, dest, dest)); + nodes.AddAfter(node, Operation(Instruction.Add, dest, dest, lsbF)); + } + + Delete(nodes, currentNode); + } + + private static void GenerateNegate(IntrusiveList<Operation> nodes, Operation node) + { + // There's no SSE FP negate instruction, so we need to transform that into + // a XOR of the value to be negated with a mask with the highest bit set. + // This also produces -0 for a negation of the value 0. + Operand dest = node.Destination; + Operand source = node.GetSource(0); + + Debug.Assert(dest.Type == OperandType.FP32 || + dest.Type == OperandType.FP64, $"Invalid destination type \"{dest.Type}\"."); + + Operation currentNode = node; + + Operand res = Local(dest.Type); + + node = nodes.AddAfter(node, Operation(Instruction.VectorOne, res)); + + if (dest.Type == OperandType.FP32) + { + node = nodes.AddAfter(node, Operation(Intrinsic.X86Pslld, res, res, Const(31))); + } + else /* if (dest.Type == OperandType.FP64) */ + { + node = nodes.AddAfter(node, Operation(Intrinsic.X86Psllq, res, res, Const(63))); + } + + node = nodes.AddAfter(node, Operation(Intrinsic.X86Xorps, res, res, source)); + + nodes.AddAfter(node, Operation(Instruction.Copy, dest, res)); + + Delete(nodes, currentNode); + } + + private static void GenerateVectorInsert8(IntrusiveList<Operation> nodes, Operation node) + { + // Handle vector insertion, when SSE 4.1 is not supported. + Operand dest = node.Destination; + Operand src1 = node.GetSource(0); // Vector + Operand src2 = node.GetSource(1); // Value + Operand src3 = node.GetSource(2); // Index + + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + Debug.Assert(index < 16); + + Operation currentNode = node; + + Operand temp1 = Local(OperandType.I32); + Operand temp2 = Local(OperandType.I32); + + node = nodes.AddAfter(node, Operation(Instruction.Copy, temp2, src2)); + + Operation vextOp = Operation(Instruction.VectorExtract16, temp1, src1, Const(index >> 1)); + + node = nodes.AddAfter(node, vextOp); + + if ((index & 1) != 0) + { + node = nodes.AddAfter(node, Operation(Instruction.ZeroExtend8, temp1, temp1)); + node = nodes.AddAfter(node, Operation(Instruction.ShiftLeft, temp2, temp2, Const(8))); + node = nodes.AddAfter(node, Operation(Instruction.BitwiseOr, temp1, temp1, temp2)); + } + else + { + node = nodes.AddAfter(node, Operation(Instruction.ZeroExtend8, temp2, temp2)); + node = nodes.AddAfter(node, Operation(Instruction.BitwiseAnd, temp1, temp1, Const(0xff00))); + node = nodes.AddAfter(node, Operation(Instruction.BitwiseOr, temp1, temp1, temp2)); + } + + Operation vinsOp = Operation(Instruction.VectorInsert16, dest, src1, temp1, Const(index >> 1)); + + nodes.AddAfter(node, vinsOp); + + Delete(nodes, currentNode); + } + + protected static Operand AddXmmCopy(IntrusiveList<Operation> nodes, Operation node, Operand source) + { + Operand temp = Local(source.Type); + Operand intConst = AddCopy(nodes, node, GetIntConst(source)); + + Operation copyOp = Operation(Instruction.VectorCreateScalar, temp, intConst); + + nodes.AddBefore(node, copyOp); + + return temp; + } + + protected static Operand AddCopy(IntrusiveList<Operation> nodes, Operation node, Operand source) + { + Operand temp = Local(source.Type); + + Operation copyOp = Operation(Instruction.Copy, temp, source); + + nodes.AddBefore(node, copyOp); + + return temp; + } + + private static Operand GetIntConst(Operand value) + { + if (value.Type == OperandType.FP32) + { + return Const(value.AsInt32()); + } + else if (value.Type == OperandType.FP64) + { + return Const(value.AsInt64()); + } + + return value; + } + + protected static void Delete(IntrusiveList<Operation> nodes, Operation node) + { + node.Destination = default; + + for (int index = 0; index < node.SourcesCount; index++) + { + node.SetSource(index, default); + } + + nodes.Remove(node); + } + + protected static Operand Gpr(X86Register register, OperandType type) + { + return Register((int)register, RegisterType.Integer, type); + } + + protected static Operand Xmm(X86Register register, OperandType type) + { + return Register((int)register, RegisterType.Vector, type); + } + + private static bool IsSameOperandDestSrc1(Operation operation) + { + switch (operation.Instruction) + { + case Instruction.Add: + return !HardwareCapabilities.SupportsVexEncoding && !operation.Destination.Type.IsInteger(); + case Instruction.Multiply: + case Instruction.Subtract: + return !HardwareCapabilities.SupportsVexEncoding || operation.Destination.Type.IsInteger(); + + case Instruction.BitwiseAnd: + case Instruction.BitwiseExclusiveOr: + case Instruction.BitwiseNot: + case Instruction.BitwiseOr: + case Instruction.ByteSwap: + case Instruction.Negate: + case Instruction.RotateRight: + case Instruction.ShiftLeft: + case Instruction.ShiftRightSI: + case Instruction.ShiftRightUI: + return true; + + case Instruction.Divide: + return !HardwareCapabilities.SupportsVexEncoding && !operation.Destination.Type.IsInteger(); + + case Instruction.VectorInsert: + case Instruction.VectorInsert16: + case Instruction.VectorInsert8: + return !HardwareCapabilities.SupportsVexEncoding; + + case Instruction.Extended: + return IsIntrinsicSameOperandDestSrc1(operation); + } + + return IsVexSameOperandDestSrc1(operation); + } + + private static bool IsIntrinsicSameOperandDestSrc1(Operation operation) + { + IntrinsicInfo info = IntrinsicTable.GetInfo(operation.Intrinsic); + + return info.Type == IntrinsicType.Crc32 || info.Type == IntrinsicType.Fma || IsVexSameOperandDestSrc1(operation); + } + + private static bool IsVexSameOperandDestSrc1(Operation operation) + { + if (IsIntrinsic(operation.Instruction)) + { + IntrinsicInfo info = IntrinsicTable.GetInfo(operation.Intrinsic); + + bool hasVex = HardwareCapabilities.SupportsVexEncoding && Assembler.SupportsVexPrefix(info.Inst); + + bool isUnary = operation.SourcesCount < 2; + + bool hasVecDest = operation.Destination != default && operation.Destination.Type == OperandType.V128; + + return !hasVex && !isUnary && hasVecDest; + } + + return false; + } + + private static bool HasConstSrc1(Instruction inst) + { + switch (inst) + { + case Instruction.Copy: + case Instruction.LoadArgument: + case Instruction.Spill: + case Instruction.SpillArg: + return true; + } + + return false; + } + + private static bool HasConstSrc2(Instruction inst) + { + switch (inst) + { + case Instruction.Add: + case Instruction.BitwiseAnd: + case Instruction.BitwiseExclusiveOr: + case Instruction.BitwiseOr: + case Instruction.BranchIf: + case Instruction.Compare: + case Instruction.Multiply: + case Instruction.RotateRight: + case Instruction.ShiftLeft: + case Instruction.ShiftRightSI: + case Instruction.ShiftRightUI: + case Instruction.Store: + case Instruction.Store16: + case Instruction.Store8: + case Instruction.Subtract: + case Instruction.VectorExtract: + case Instruction.VectorExtract16: + case Instruction.VectorExtract8: + return true; + } + + return false; + } + + private static bool IsCommutative(Operation operation) + { + switch (operation.Instruction) + { + case Instruction.Add: + case Instruction.BitwiseAnd: + case Instruction.BitwiseExclusiveOr: + case Instruction.BitwiseOr: + case Instruction.Multiply: + return true; + + case Instruction.BranchIf: + case Instruction.Compare: + { + Operand comp = operation.GetSource(2); + + Debug.Assert(comp.Kind == OperandKind.Constant); + + var compType = (Comparison)comp.AsInt32(); + + return compType == Comparison.Equal || compType == Comparison.NotEqual; + } + } + + return false; + } + + private static bool IsIntrinsic(Instruction inst) + { + return inst == Instruction.Extended; + } + + private static bool IsXmmIntrinsic(Operation operation) + { + if (operation.Instruction != Instruction.Extended) + { + return false; + } + + IntrinsicInfo info = IntrinsicTable.GetInfo(operation.Intrinsic); + + return info.Type != IntrinsicType.Crc32; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/PreAllocatorSystemV.cs b/src/ARMeilleure/CodeGen/X86/PreAllocatorSystemV.cs new file mode 100644 index 00000000..a84d5050 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/PreAllocatorSystemV.cs @@ -0,0 +1,334 @@ +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.CodeGen.X86 +{ + class PreAllocatorSystemV : PreAllocator + { + public static void InsertCallCopies(IntrusiveList<Operation> nodes, Operation node) + { + Operand dest = node.Destination; + + List<Operand> sources = new List<Operand> + { + node.GetSource(0) + }; + + int argsCount = node.SourcesCount - 1; + + int intMax = CallingConvention.GetIntArgumentsOnRegsCount(); + int vecMax = CallingConvention.GetVecArgumentsOnRegsCount(); + + int intCount = 0; + int vecCount = 0; + + int stackOffset = 0; + + for (int index = 0; index < argsCount; index++) + { + Operand source = node.GetSource(index + 1); + + bool passOnReg; + + if (source.Type.IsInteger()) + { + passOnReg = intCount < intMax; + } + else if (source.Type == OperandType.V128) + { + passOnReg = intCount + 1 < intMax; + } + else + { + passOnReg = vecCount < vecMax; + } + + if (source.Type == OperandType.V128 && passOnReg) + { + // V128 is a struct, we pass each half on a GPR if possible. + Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + + nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0))); + nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1))); + + continue; + } + + if (passOnReg) + { + Operand argReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type); + + Operation copyOp = Operation(Instruction.Copy, argReg, source); + + InsertConstantRegCopies(nodes, nodes.AddBefore(node, copyOp)); + + sources.Add(argReg); + } + else + { + Operand offset = Const(stackOffset); + + Operation spillOp = Operation(Instruction.SpillArg, default, offset, source); + + InsertConstantRegCopies(nodes, nodes.AddBefore(node, spillOp)); + + stackOffset += source.Type.GetSizeInBytes(); + } + } + + node.SetSources(sources.ToArray()); + + if (dest != default) + { + if (dest.Type == OperandType.V128) + { + Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64); + Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64); + + Operation operation = node; + + node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, retLReg)); + nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, retHReg, Const(1))); + + operation.Destination = default; + } + else + { + Operand retReg = dest.Type.IsInteger() + ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type) + : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type); + + Operation copyOp = Operation(Instruction.Copy, dest, retReg); + + nodes.AddAfter(node, copyOp); + + node.Destination = retReg; + } + } + } + + public static void InsertTailcallCopies(IntrusiveList<Operation> nodes, StackAllocator stackAlloc, Operation node) + { + List<Operand> sources = new List<Operand> + { + node.GetSource(0) + }; + + int argsCount = node.SourcesCount - 1; + + int intMax = CallingConvention.GetIntArgumentsOnRegsCount(); + int vecMax = CallingConvention.GetVecArgumentsOnRegsCount(); + + int intCount = 0; + int vecCount = 0; + + // Handle arguments passed on registers. + for (int index = 0; index < argsCount; index++) + { + Operand source = node.GetSource(1 + index); + + bool passOnReg; + + if (source.Type.IsInteger()) + { + passOnReg = intCount + 1 < intMax; + } + else + { + passOnReg = vecCount < vecMax; + } + + if (source.Type == OperandType.V128 && passOnReg) + { + // V128 is a struct, we pass each half on a GPR if possible. + Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + + nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0))); + nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1))); + + continue; + } + + if (passOnReg) + { + Operand argReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type); + + Operation copyOp = Operation(Instruction.Copy, argReg, source); + + InsertConstantRegCopies(nodes, nodes.AddBefore(node, copyOp)); + + sources.Add(argReg); + } + else + { + throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)"); + } + } + + // The target address must be on the return registers, since we + // don't return anything and it is guaranteed to not be a + // callee saved register (which would be trashed on the epilogue). + Operand retReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64); + + Operation addrCopyOp = Operation(Instruction.Copy, retReg, node.GetSource(0)); + + nodes.AddBefore(node, addrCopyOp); + + sources[0] = retReg; + + node.SetSources(sources.ToArray()); + } + + public static Operation InsertLoadArgumentCopy( + CompilerContext cctx, + ref Span<Operation> buffer, + IntrusiveList<Operation> nodes, + Operand[] preservedArgs, + Operation node) + { + Operand source = node.GetSource(0); + + Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind."); + + int index = source.AsInt32(); + + int intCount = 0; + int vecCount = 0; + + for (int cIndex = 0; cIndex < index; cIndex++) + { + OperandType argType = cctx.FuncArgTypes[cIndex]; + + if (argType.IsInteger()) + { + intCount++; + } + else if (argType == OperandType.V128) + { + intCount += 2; + } + else + { + vecCount++; + } + } + + bool passOnReg; + + if (source.Type.IsInteger()) + { + passOnReg = intCount < CallingConvention.GetIntArgumentsOnRegsCount(); + } + else if (source.Type == OperandType.V128) + { + passOnReg = intCount + 1 < CallingConvention.GetIntArgumentsOnRegsCount(); + } + else + { + passOnReg = vecCount < CallingConvention.GetVecArgumentsOnRegsCount(); + } + + if (passOnReg) + { + Operand dest = node.Destination; + + if (preservedArgs[index] == default) + { + if (dest.Type == OperandType.V128) + { + // V128 is a struct, we pass each half on a GPR if possible. + Operand pArg = Local(OperandType.V128); + + Operand argLReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount), OperandType.I64); + Operand argHReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount + 1), OperandType.I64); + + Operation copyL = Operation(Instruction.VectorCreateScalar, pArg, argLReg); + Operation copyH = Operation(Instruction.VectorInsert, pArg, pArg, argHReg, Const(1)); + + cctx.Cfg.Entry.Operations.AddFirst(copyH); + cctx.Cfg.Entry.Operations.AddFirst(copyL); + + preservedArgs[index] = pArg; + } + else + { + Operand pArg = Local(dest.Type); + + Operand argReg = dest.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(intCount), dest.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(vecCount), dest.Type); + + Operation copyOp = Operation(Instruction.Copy, pArg, argReg); + + cctx.Cfg.Entry.Operations.AddFirst(copyOp); + + preservedArgs[index] = pArg; + } + } + + Operation nextNode; + + if (dest.AssignmentsCount == 1) + { + // Let's propagate the argument if we can to avoid copies. + PreAllocatorCommon.Propagate(ref buffer, dest, preservedArgs[index]); + nextNode = node.ListNext; + } + else + { + Operation argCopyOp = Operation(Instruction.Copy, dest, preservedArgs[index]); + nextNode = nodes.AddBefore(node, argCopyOp); + } + + Delete(nodes, node); + return nextNode; + } + else + { + // TODO: Pass on stack. + return node; + } + } + + public static void InsertReturnCopy(IntrusiveList<Operation> nodes, Operation node) + { + if (node.SourcesCount == 0) + { + return; + } + + Operand source = node.GetSource(0); + + if (source.Type == OperandType.V128) + { + Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64); + Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64); + + nodes.AddBefore(node, Operation(Instruction.VectorExtract, retLReg, source, Const(0))); + nodes.AddBefore(node, Operation(Instruction.VectorExtract, retHReg, source, Const(1))); + } + else + { + Operand retReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntReturnRegister(), source.Type) + : Xmm(CallingConvention.GetVecReturnRegister(), source.Type); + + Operation retCopyOp = Operation(Instruction.Copy, retReg, source); + + nodes.AddBefore(node, retCopyOp); + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/PreAllocatorWindows.cs b/src/ARMeilleure/CodeGen/X86/PreAllocatorWindows.cs new file mode 100644 index 00000000..45319e6a --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/PreAllocatorWindows.cs @@ -0,0 +1,327 @@ +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.CodeGen.X86 +{ + class PreAllocatorWindows : PreAllocator + { + public static void InsertCallCopies(IntrusiveList<Operation> nodes, StackAllocator stackAlloc, Operation node) + { + Operand dest = node.Destination; + + // Handle struct arguments. + int retArgs = 0; + int stackAllocOffset = 0; + + int AllocateOnStack(int size) + { + // We assume that the stack allocator is initially empty (TotalSize = 0). + // Taking that into account, we can reuse the space allocated for other + // calls by keeping track of our own allocated size (stackAllocOffset). + // If the space allocated is not big enough, then we just expand it. + int offset = stackAllocOffset; + + if (stackAllocOffset + size > stackAlloc.TotalSize) + { + stackAlloc.Allocate((stackAllocOffset + size) - stackAlloc.TotalSize); + } + + stackAllocOffset += size; + + return offset; + } + + Operand arg0Reg = default; + + if (dest != default && dest.Type == OperandType.V128) + { + int stackOffset = AllocateOnStack(dest.Type.GetSizeInBytes()); + + arg0Reg = Gpr(CallingConvention.GetIntArgumentRegister(0), OperandType.I64); + + Operation allocOp = Operation(Instruction.StackAlloc, arg0Reg, Const(stackOffset)); + + nodes.AddBefore(node, allocOp); + + retArgs = 1; + } + + int argsCount = node.SourcesCount - 1; + int maxArgs = CallingConvention.GetArgumentsOnRegsCount() - retArgs; + + if (argsCount > maxArgs) + { + argsCount = maxArgs; + } + + Operand[] sources = new Operand[1 + retArgs + argsCount]; + + sources[0] = node.GetSource(0); + + if (arg0Reg != default) + { + sources[1] = arg0Reg; + } + + for (int index = 1; index < node.SourcesCount; index++) + { + Operand source = node.GetSource(index); + + if (source.Type == OperandType.V128) + { + Operand stackAddr = Local(OperandType.I64); + + int stackOffset = AllocateOnStack(source.Type.GetSizeInBytes()); + + nodes.AddBefore(node, Operation(Instruction.StackAlloc, stackAddr, Const(stackOffset))); + + Operation storeOp = Operation(Instruction.Store, default, stackAddr, source); + + InsertConstantRegCopies(nodes, nodes.AddBefore(node, storeOp)); + + node.SetSource(index, stackAddr); + } + } + + // Handle arguments passed on registers. + for (int index = 0; index < argsCount; index++) + { + Operand source = node.GetSource(index + 1); + Operand argReg; + + int argIndex = index + retArgs; + + if (source.Type.IsInteger()) + { + argReg = Gpr(CallingConvention.GetIntArgumentRegister(argIndex), source.Type); + } + else + { + argReg = Xmm(CallingConvention.GetVecArgumentRegister(argIndex), source.Type); + } + + Operation copyOp = Operation(Instruction.Copy, argReg, source); + + InsertConstantRegCopies(nodes, nodes.AddBefore(node, copyOp)); + + sources[1 + retArgs + index] = argReg; + } + + // The remaining arguments (those that are not passed on registers) + // should be passed on the stack, we write them to the stack with "SpillArg". + for (int index = argsCount; index < node.SourcesCount - 1; index++) + { + Operand source = node.GetSource(index + 1); + Operand offset = Const((index + retArgs) * 8); + + Operation spillOp = Operation(Instruction.SpillArg, default, offset, source); + + InsertConstantRegCopies(nodes, nodes.AddBefore(node, spillOp)); + } + + if (dest != default) + { + if (dest.Type == OperandType.V128) + { + Operand retValueAddr = Local(OperandType.I64); + + nodes.AddBefore(node, Operation(Instruction.Copy, retValueAddr, arg0Reg)); + + Operation loadOp = Operation(Instruction.Load, dest, retValueAddr); + + nodes.AddAfter(node, loadOp); + + node.Destination = default; + } + else + { + Operand retReg = dest.Type.IsInteger() + ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type) + : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type); + + Operation copyOp = Operation(Instruction.Copy, dest, retReg); + + nodes.AddAfter(node, copyOp); + + node.Destination = retReg; + } + } + + node.SetSources(sources); + } + + public static void InsertTailcallCopies(IntrusiveList<Operation> nodes, StackAllocator stackAlloc, Operation node) + { + int argsCount = node.SourcesCount - 1; + int maxArgs = CallingConvention.GetArgumentsOnRegsCount(); + + if (argsCount > maxArgs) + { + throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)"); + } + + Operand[] sources = new Operand[1 + argsCount]; + + // Handle arguments passed on registers. + for (int index = 0; index < argsCount; index++) + { + Operand source = node.GetSource(1 + index); + Operand argReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(index), source.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(index), source.Type); + + Operation copyOp = Operation(Instruction.Copy, argReg, source); + + InsertConstantRegCopies(nodes, nodes.AddBefore(node, copyOp)); + + sources[1 + index] = argReg; + } + + // The target address must be on the return registers, since we + // don't return anything and it is guaranteed to not be a + // callee saved register (which would be trashed on the epilogue). + Operand retReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64); + + Operation addrCopyOp = Operation(Instruction.Copy, retReg, node.GetSource(0)); + + nodes.AddBefore(node, addrCopyOp); + + sources[0] = retReg; + + node.SetSources(sources); + } + + public static Operation InsertLoadArgumentCopy( + CompilerContext cctx, + ref Span<Operation> buffer, + IntrusiveList<Operation> nodes, + Operand[] preservedArgs, + Operation node) + { + Operand source = node.GetSource(0); + + Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind."); + + int retArgs = cctx.FuncReturnType == OperandType.V128 ? 1 : 0; + + int index = source.AsInt32() + retArgs; + + if (index < CallingConvention.GetArgumentsOnRegsCount()) + { + Operand dest = node.Destination; + + if (preservedArgs[index] == default) + { + Operand argReg, pArg; + + if (dest.Type.IsInteger()) + { + argReg = Gpr(CallingConvention.GetIntArgumentRegister(index), dest.Type); + pArg = Local(dest.Type); + } + else if (dest.Type == OperandType.V128) + { + argReg = Gpr(CallingConvention.GetIntArgumentRegister(index), OperandType.I64); + pArg = Local(OperandType.I64); + } + else + { + argReg = Xmm(CallingConvention.GetVecArgumentRegister(index), dest.Type); + pArg = Local(dest.Type); + } + + Operation copyOp = Operation(Instruction.Copy, pArg, argReg); + + cctx.Cfg.Entry.Operations.AddFirst(copyOp); + + preservedArgs[index] = pArg; + } + + Operation nextNode; + + if (dest.Type != OperandType.V128 && dest.AssignmentsCount == 1) + { + // Let's propagate the argument if we can to avoid copies. + PreAllocatorCommon.Propagate(ref buffer, dest, preservedArgs[index]); + nextNode = node.ListNext; + } + else + { + Operation argCopyOp = Operation(dest.Type == OperandType.V128 + ? Instruction.Load + : Instruction.Copy, dest, preservedArgs[index]); + + nextNode = nodes.AddBefore(node, argCopyOp); + } + + Delete(nodes, node); + return nextNode; + } + else + { + // TODO: Pass on stack. + return node; + } + } + + public static void InsertReturnCopy( + CompilerContext cctx, + IntrusiveList<Operation> nodes, + Operand[] preservedArgs, + Operation node) + { + if (node.SourcesCount == 0) + { + return; + } + + Operand source = node.GetSource(0); + Operand retReg; + + if (source.Type.IsInteger()) + { + retReg = Gpr(CallingConvention.GetIntReturnRegister(), source.Type); + } + else if (source.Type == OperandType.V128) + { + if (preservedArgs[0] == default) + { + Operand preservedArg = Local(OperandType.I64); + Operand arg0 = Gpr(CallingConvention.GetIntArgumentRegister(0), OperandType.I64); + + Operation copyOp = Operation(Instruction.Copy, preservedArg, arg0); + + cctx.Cfg.Entry.Operations.AddFirst(copyOp); + + preservedArgs[0] = preservedArg; + } + + retReg = preservedArgs[0]; + } + else + { + retReg = Xmm(CallingConvention.GetVecReturnRegister(), source.Type); + } + + if (source.Type == OperandType.V128) + { + Operation retStoreOp = Operation(Instruction.Store, default, retReg, source); + + nodes.AddBefore(node, retStoreOp); + } + else + { + Operation retCopyOp = Operation(Instruction.Copy, retReg, source); + + nodes.AddBefore(node, retCopyOp); + } + + node.SetSources(Array.Empty<Operand>()); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/X86Condition.cs b/src/ARMeilleure/CodeGen/X86/X86Condition.cs new file mode 100644 index 00000000..c82cbdec --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/X86Condition.cs @@ -0,0 +1,47 @@ +using ARMeilleure.IntermediateRepresentation; +using System; + +namespace ARMeilleure.CodeGen.X86 +{ + enum X86Condition + { + Overflow = 0x0, + NotOverflow = 0x1, + Below = 0x2, + AboveOrEqual = 0x3, + Equal = 0x4, + NotEqual = 0x5, + BelowOrEqual = 0x6, + Above = 0x7, + Sign = 0x8, + NotSign = 0x9, + ParityEven = 0xa, + ParityOdd = 0xb, + Less = 0xc, + GreaterOrEqual = 0xd, + LessOrEqual = 0xe, + Greater = 0xf + } + + static class ComparisonX86Extensions + { + public static X86Condition ToX86Condition(this Comparison comp) + { + return comp switch + { + Comparison.Equal => X86Condition.Equal, + Comparison.NotEqual => X86Condition.NotEqual, + Comparison.Greater => X86Condition.Greater, + Comparison.LessOrEqual => X86Condition.LessOrEqual, + Comparison.GreaterUI => X86Condition.Above, + Comparison.LessOrEqualUI => X86Condition.BelowOrEqual, + Comparison.GreaterOrEqual => X86Condition.GreaterOrEqual, + Comparison.Less => X86Condition.Less, + Comparison.GreaterOrEqualUI => X86Condition.AboveOrEqual, + Comparison.LessUI => X86Condition.Below, + + _ => throw new ArgumentException(null, nameof(comp)) + }; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/X86Instruction.cs b/src/ARMeilleure/CodeGen/X86/X86Instruction.cs new file mode 100644 index 00000000..9a85c516 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/X86Instruction.cs @@ -0,0 +1,231 @@ +namespace ARMeilleure.CodeGen.X86 +{ + enum X86Instruction + { + None, + Add, + Addpd, + Addps, + Addsd, + Addss, + Aesdec, + Aesdeclast, + Aesenc, + Aesenclast, + Aesimc, + And, + Andnpd, + Andnps, + Andpd, + Andps, + Blendvpd, + Blendvps, + Bsr, + Bswap, + Call, + Cmovcc, + Cmp, + Cmppd, + Cmpps, + Cmpsd, + Cmpss, + Cmpxchg, + Cmpxchg16b, + Cmpxchg8, + Comisd, + Comiss, + Crc32, + Crc32_16, + Crc32_8, + Cvtdq2pd, + Cvtdq2ps, + Cvtpd2dq, + Cvtpd2ps, + Cvtps2dq, + Cvtps2pd, + Cvtsd2si, + Cvtsd2ss, + Cvtsi2sd, + Cvtsi2ss, + Cvtss2sd, + Cvtss2si, + Div, + Divpd, + Divps, + Divsd, + Divss, + Gf2p8affineqb, + Haddpd, + Haddps, + Idiv, + Imul, + Imul128, + Insertps, + Jmp, + Ldmxcsr, + Lea, + Maxpd, + Maxps, + Maxsd, + Maxss, + Minpd, + Minps, + Minsd, + Minss, + Mov, + Mov16, + Mov8, + Movd, + Movdqu, + Movhlps, + Movlhps, + Movq, + Movsd, + Movss, + Movsx16, + Movsx32, + Movsx8, + Movzx16, + Movzx8, + Mul128, + Mulpd, + Mulps, + Mulsd, + Mulss, + Neg, + Not, + Or, + Paddb, + Paddd, + Paddq, + Paddw, + Palignr, + Pand, + Pandn, + Pavgb, + Pavgw, + Pblendvb, + Pclmulqdq, + Pcmpeqb, + Pcmpeqd, + Pcmpeqq, + Pcmpeqw, + Pcmpgtb, + Pcmpgtd, + Pcmpgtq, + Pcmpgtw, + Pextrb, + Pextrd, + Pextrq, + Pextrw, + Pinsrb, + Pinsrd, + Pinsrq, + Pinsrw, + Pmaxsb, + Pmaxsd, + Pmaxsw, + Pmaxub, + Pmaxud, + Pmaxuw, + Pminsb, + Pminsd, + Pminsw, + Pminub, + Pminud, + Pminuw, + Pmovsxbw, + Pmovsxdq, + Pmovsxwd, + Pmovzxbw, + Pmovzxdq, + Pmovzxwd, + Pmulld, + Pmullw, + Pop, + Popcnt, + Por, + Pshufb, + Pshufd, + Pslld, + Pslldq, + Psllq, + Psllw, + Psrad, + Psraw, + Psrld, + Psrlq, + Psrldq, + Psrlw, + Psubb, + Psubd, + Psubq, + Psubw, + Punpckhbw, + Punpckhdq, + Punpckhqdq, + Punpckhwd, + Punpcklbw, + Punpckldq, + Punpcklqdq, + Punpcklwd, + Push, + Pxor, + Rcpps, + Rcpss, + Ror, + Roundpd, + Roundps, + Roundsd, + Roundss, + Rsqrtps, + Rsqrtss, + Sar, + Setcc, + Sha256Msg1, + Sha256Msg2, + Sha256Rnds2, + Shl, + Shr, + Shufpd, + Shufps, + Sqrtpd, + Sqrtps, + Sqrtsd, + Sqrtss, + Stmxcsr, + Sub, + Subpd, + Subps, + Subsd, + Subss, + Test, + Unpckhpd, + Unpckhps, + Unpcklpd, + Unpcklps, + Vblendvpd, + Vblendvps, + Vcvtph2ps, + Vcvtps2ph, + Vfmadd231pd, + Vfmadd231ps, + Vfmadd231sd, + Vfmadd231ss, + Vfmsub231sd, + Vfmsub231ss, + Vfnmadd231pd, + Vfnmadd231ps, + Vfnmadd231sd, + Vfnmadd231ss, + Vfnmsub231sd, + Vfnmsub231ss, + Vpblendvb, + Vpternlogd, + Xor, + Xorpd, + Xorps, + + Count + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/X86Optimizer.cs b/src/ARMeilleure/CodeGen/X86/X86Optimizer.cs new file mode 100644 index 00000000..98a19b9a --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/X86Optimizer.cs @@ -0,0 +1,259 @@ +using ARMeilleure.CodeGen.Optimizations; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System.Collections.Generic; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.CodeGen.X86 +{ + static class X86Optimizer + { + private const int MaxConstantUses = 10000; + + public static void RunPass(ControlFlowGraph cfg) + { + var constants = new Dictionary<ulong, Operand>(); + + Operand GetConstantCopy(BasicBlock block, Operation operation, Operand source) + { + // If the constant has many uses, we also force a new constant mov to be added, in order + // to avoid overflow of the counts field (that is limited to 16 bits). + if (!constants.TryGetValue(source.Value, out var constant) || constant.UsesCount > MaxConstantUses) + { + constant = Local(source.Type); + + Operation copyOp = Operation(Instruction.Copy, constant, source); + + block.Operations.AddBefore(operation, copyOp); + + constants[source.Value] = constant; + } + + return constant; + } + + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + constants.Clear(); + + Operation nextNode; + + for (Operation node = block.Operations.First; node != default; node = nextNode) + { + nextNode = node.ListNext; + + // Insert copies for constants that can't fit on a 32-bits immediate. + // Doing this early unblocks a few optimizations. + if (node.Instruction == Instruction.Add) + { + Operand src1 = node.GetSource(0); + Operand src2 = node.GetSource(1); + + if (src1.Kind == OperandKind.Constant && (src1.Relocatable || CodeGenCommon.IsLongConst(src1))) + { + node.SetSource(0, GetConstantCopy(block, node, src1)); + } + + if (src2.Kind == OperandKind.Constant && (src2.Relocatable || CodeGenCommon.IsLongConst(src2))) + { + node.SetSource(1, GetConstantCopy(block, node, src2)); + } + } + + // Try to fold something like: + // shl rbx, 2 + // add rax, rbx + // add rax, 0xcafe + // mov rax, [rax] + // Into: + // mov rax, [rax+rbx*4+0xcafe] + if (IsMemoryLoadOrStore(node.Instruction)) + { + OperandType type; + + if (node.Destination != default) + { + type = node.Destination.Type; + } + else + { + type = node.GetSource(1).Type; + } + + Operand memOp = GetMemoryOperandOrNull(node.GetSource(0), type); + + if (memOp != default) + { + node.SetSource(0, memOp); + } + } + } + } + + Optimizer.RemoveUnusedNodes(cfg); + } + + private static Operand GetMemoryOperandOrNull(Operand addr, OperandType type) + { + Operand baseOp = addr; + + // First we check if the address is the result of a local X with 32-bits immediate + // addition. If that is the case, then the baseOp is X, and the memory operand immediate + // becomes the addition immediate. Otherwise baseOp keeps being the address. + int imm = GetConstOp(ref baseOp); + + // Now we check if the baseOp is the result of a local Y with a local Z addition. + // If that is the case, we now set baseOp to Y and indexOp to Z. We further check + // if Z is the result of a left shift of local W by a value >= 0 and <= 3, if that + // is the case, we set indexOp to W and adjust the scale value of the memory operand + // to match that of the left shift. + // There is one missed case, which is the address being a shift result, but this is + // probably not worth optimizing as it should never happen. + (Operand indexOp, Multiplier scale) = GetIndexOp(ref baseOp); + + // If baseOp is still equal to address, then there's nothing that can be optimized. + if (baseOp == addr) + { + return default; + } + + if (imm == 0 && scale == Multiplier.x1 && indexOp != default) + { + imm = GetConstOp(ref indexOp); + } + + return MemoryOp(type, baseOp, indexOp, scale, imm); + } + + private static int GetConstOp(ref Operand baseOp) + { + Operation operation = GetAsgOpWithInst(baseOp, Instruction.Add); + + if (operation == default) + { + return 0; + } + + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + Operand constOp; + Operand otherOp; + + if (src1.Kind == OperandKind.Constant && src2.Kind == OperandKind.LocalVariable) + { + constOp = src1; + otherOp = src2; + } + else if (src1.Kind == OperandKind.LocalVariable && src2.Kind == OperandKind.Constant) + { + constOp = src2; + otherOp = src1; + } + else + { + return 0; + } + + // If we have addition by 64-bits constant, then we can't optimize it further, + // as we can't encode a 64-bits immediate on the memory operand. + if (CodeGenCommon.IsLongConst(constOp)) + { + return 0; + } + + baseOp = otherOp; + + return constOp.AsInt32(); + } + + private static (Operand, Multiplier) GetIndexOp(ref Operand baseOp) + { + Operand indexOp = default; + + Multiplier scale = Multiplier.x1; + + Operation addOp = GetAsgOpWithInst(baseOp, Instruction.Add); + + if (addOp == default) + { + return (indexOp, scale); + } + + Operand src1 = addOp.GetSource(0); + Operand src2 = addOp.GetSource(1); + + if (src1.Kind != OperandKind.LocalVariable || src2.Kind != OperandKind.LocalVariable) + { + return (indexOp, scale); + } + + baseOp = src1; + indexOp = src2; + + Operation shlOp = GetAsgOpWithInst(src1, Instruction.ShiftLeft); + + bool indexOnSrc2 = false; + + if (shlOp == default) + { + shlOp = GetAsgOpWithInst(src2, Instruction.ShiftLeft); + + indexOnSrc2 = true; + } + + if (shlOp != default) + { + Operand shSrc = shlOp.GetSource(0); + Operand shift = shlOp.GetSource(1); + + if (shSrc.Kind == OperandKind.LocalVariable && shift.Kind == OperandKind.Constant && shift.Value <= 3) + { + scale = shift.Value switch + { + 1 => Multiplier.x2, + 2 => Multiplier.x4, + 3 => Multiplier.x8, + _ => Multiplier.x1 + }; + + baseOp = indexOnSrc2 ? src1 : src2; + indexOp = shSrc; + } + } + + return (indexOp, scale); + } + + private static Operation GetAsgOpWithInst(Operand op, Instruction inst) + { + // If we have multiple assignments, folding is not safe + // as the value may be different depending on the + // control flow path. + if (op.AssignmentsCount != 1) + { + return default; + } + + Operation asgOp = op.Assignments[0]; + + if (asgOp.Instruction != inst) + { + return default; + } + + return asgOp; + } + + private static bool IsMemoryLoadOrStore(Instruction inst) + { + return inst == Instruction.Load || + inst == Instruction.Load16 || + inst == Instruction.Load8 || + inst == Instruction.Store || + inst == Instruction.Store16 || + inst == Instruction.Store8; + } + } +} diff --git a/src/ARMeilleure/CodeGen/X86/X86Register.cs b/src/ARMeilleure/CodeGen/X86/X86Register.cs new file mode 100644 index 00000000..01f63e31 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/X86Register.cs @@ -0,0 +1,41 @@ +namespace ARMeilleure.CodeGen.X86 +{ + enum X86Register + { + Invalid = -1, + + Rax = 0, + Rcx = 1, + Rdx = 2, + Rbx = 3, + Rsp = 4, + Rbp = 5, + Rsi = 6, + Rdi = 7, + R8 = 8, + R9 = 9, + R10 = 10, + R11 = 11, + R12 = 12, + R13 = 13, + R14 = 14, + R15 = 15, + + Xmm0 = 0, + Xmm1 = 1, + Xmm2 = 2, + Xmm3 = 3, + Xmm4 = 4, + Xmm5 = 5, + Xmm6 = 6, + Xmm7 = 7, + Xmm8 = 8, + Xmm9 = 9, + Xmm10 = 10, + Xmm11 = 11, + Xmm12 = 12, + Xmm13 = 13, + Xmm14 = 14, + Xmm15 = 15 + } +}
\ No newline at end of file |
