aboutsummaryrefslogtreecommitdiff
path: root/src/ARMeilleure/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'src/ARMeilleure/CodeGen')
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs270
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/ArmCondition.cs47
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs14
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs11
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/Assembler.cs1160
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/CallingConvention.cs96
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs91
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs287
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs1580
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs662
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs185
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs14
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs463
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs59
-rw-r--r--src/ARMeilleure/CodeGen/Arm64/PreAllocator.cs892
-rw-r--r--src/ARMeilleure/CodeGen/CompiledFunction.cs68
-rw-r--r--src/ARMeilleure/CodeGen/Linking/RelocEntry.cs38
-rw-r--r--src/ARMeilleure/CodeGen/Linking/RelocInfo.cs32
-rw-r--r--src/ARMeilleure/CodeGen/Linking/Symbol.cs99
-rw-r--r--src/ARMeilleure/CodeGen/Linking/SymbolType.cs28
-rw-r--r--src/ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs72
-rw-r--r--src/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs346
-rw-r--r--src/ARMeilleure/CodeGen/Optimizations/Optimizer.cs252
-rw-r--r--src/ARMeilleure/CodeGen/Optimizations/Simplification.cs183
-rw-r--r--src/ARMeilleure/CodeGen/Optimizations/TailMerge.cs83
-rw-r--r--src/ARMeilleure/CodeGen/PreAllocatorCommon.cs57
-rw-r--r--src/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs19
-rw-r--r--src/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs259
-rw-r--r--src/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs454
-rw-r--r--src/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs12
-rw-r--r--src/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs1101
-rw-r--r--src/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs396
-rw-r--r--src/ARMeilleure/CodeGen/RegisterAllocators/LiveIntervalList.cs40
-rw-r--r--src/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs74
-rw-r--r--src/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs50
-rw-r--r--src/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs25
-rw-r--r--src/ARMeilleure/CodeGen/RegisterAllocators/UseList.cs84
-rw-r--r--src/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs16
-rw-r--r--src/ARMeilleure/CodeGen/Unwinding/UnwindPseudoOp.cs11
-rw-r--r--src/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs20
-rw-r--r--src/ARMeilleure/CodeGen/X86/Assembler.cs1559
-rw-r--r--src/ARMeilleure/CodeGen/X86/AssemblerTable.cs295
-rw-r--r--src/ARMeilleure/CodeGen/X86/CallConvName.cs8
-rw-r--r--src/ARMeilleure/CodeGen/X86/CallingConvention.cs158
-rw-r--r--src/ARMeilleure/CodeGen/X86/CodeGenCommon.cs19
-rw-r--r--src/ARMeilleure/CodeGen/X86/CodeGenContext.cs105
-rw-r--r--src/ARMeilleure/CodeGen/X86/CodeGenerator.cs1865
-rw-r--r--src/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs144
-rw-r--r--src/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs14
-rw-r--r--src/ARMeilleure/CodeGen/X86/IntrinsicTable.cs200
-rw-r--r--src/ARMeilleure/CodeGen/X86/IntrinsicType.cs18
-rw-r--r--src/ARMeilleure/CodeGen/X86/Mxcsr.cs15
-rw-r--r--src/ARMeilleure/CodeGen/X86/PreAllocator.cs796
-rw-r--r--src/ARMeilleure/CodeGen/X86/PreAllocatorSystemV.cs334
-rw-r--r--src/ARMeilleure/CodeGen/X86/PreAllocatorWindows.cs327
-rw-r--r--src/ARMeilleure/CodeGen/X86/X86Condition.cs47
-rw-r--r--src/ARMeilleure/CodeGen/X86/X86Instruction.cs231
-rw-r--r--src/ARMeilleure/CodeGen/X86/X86Optimizer.cs259
-rw-r--r--src/ARMeilleure/CodeGen/X86/X86Register.cs41
59 files changed, 16085 insertions, 0 deletions
diff --git a/src/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs b/src/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs
new file mode 100644
index 00000000..fdd4d024
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs
@@ -0,0 +1,270 @@
+using ARMeilleure.CodeGen.Optimizations;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System.Collections.Generic;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static class Arm64Optimizer
+ {
+ private const int MaxConstantUses = 10000;
+
+ public static void RunPass(ControlFlowGraph cfg)
+ {
+ var constants = new Dictionary<ulong, Operand>();
+
+ Operand GetConstantCopy(BasicBlock block, Operation operation, Operand source)
+ {
+ // If the constant has many uses, we also force a new constant mov to be added, in order
+ // to avoid overflow of the counts field (that is limited to 16 bits).
+ if (!constants.TryGetValue(source.Value, out var constant) || constant.UsesCount > MaxConstantUses)
+ {
+ constant = Local(source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, constant, source);
+
+ block.Operations.AddBefore(operation, copyOp);
+
+ constants[source.Value] = constant;
+ }
+
+ return constant;
+ }
+
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ constants.Clear();
+
+ Operation nextNode;
+
+ for (Operation node = block.Operations.First; node != default; node = nextNode)
+ {
+ nextNode = node.ListNext;
+
+ // Insert copies for constants that can't fit on a 32-bit immediate.
+ // Doing this early unblocks a few optimizations.
+ if (node.Instruction == Instruction.Add)
+ {
+ Operand src1 = node.GetSource(0);
+ Operand src2 = node.GetSource(1);
+
+ if (src1.Kind == OperandKind.Constant && (src1.Relocatable || ConstTooLong(src1, OperandType.I32)))
+ {
+ node.SetSource(0, GetConstantCopy(block, node, src1));
+ }
+
+ if (src2.Kind == OperandKind.Constant && (src2.Relocatable || ConstTooLong(src2, OperandType.I32)))
+ {
+ node.SetSource(1, GetConstantCopy(block, node, src2));
+ }
+ }
+
+ // Try to fold something like:
+ // lsl x1, x1, #2
+ // add x0, x0, x1
+ // ldr x0, [x0]
+ // add x2, x2, #16
+ // ldr x2, [x2]
+ // Into:
+ // ldr x0, [x0, x1, lsl #2]
+ // ldr x2, [x2, #16]
+ if (IsMemoryLoadOrStore(node.Instruction))
+ {
+ OperandType type;
+
+ if (node.Destination != default)
+ {
+ type = node.Destination.Type;
+ }
+ else
+ {
+ type = node.GetSource(1).Type;
+ }
+
+ Operand memOp = GetMemoryOperandOrNull(node.GetSource(0), type);
+
+ if (memOp != default)
+ {
+ node.SetSource(0, memOp);
+ }
+ }
+ }
+ }
+
+ Optimizer.RemoveUnusedNodes(cfg);
+ }
+
+ private static Operand GetMemoryOperandOrNull(Operand addr, OperandType type)
+ {
+ Operand baseOp = addr;
+
+ // First we check if the address is the result of a local X with immediate
+ // addition. If that is the case, then the baseOp is X, and the memory operand immediate
+ // becomes the addition immediate. Otherwise baseOp keeps being the address.
+ int imm = GetConstOp(ref baseOp, type);
+ if (imm != 0)
+ {
+ return MemoryOp(type, baseOp, default, Multiplier.x1, imm);
+ }
+
+ // Now we check if the baseOp is the result of a local Y with a local Z addition.
+ // If that is the case, we now set baseOp to Y and indexOp to Z. We further check
+ // if Z is the result of a left shift of local W by a value == 0 or == Log2(AccessSize),
+ // if that is the case, we set indexOp to W and adjust the scale value of the memory operand
+ // to match that of the left shift.
+ // There is one missed case, which is the address being a shift result, but this is
+ // probably not worth optimizing as it should never happen.
+ (Operand indexOp, Multiplier scale) = GetIndexOp(ref baseOp, type);
+
+ // If baseOp is still equal to address, then there's nothing that can be optimized.
+ if (baseOp == addr)
+ {
+ return default;
+ }
+
+ return MemoryOp(type, baseOp, indexOp, scale, 0);
+ }
+
+ private static int GetConstOp(ref Operand baseOp, OperandType accessType)
+ {
+ Operation operation = GetAsgOpWithInst(baseOp, Instruction.Add);
+
+ if (operation == default)
+ {
+ return 0;
+ }
+
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ Operand constOp;
+ Operand otherOp;
+
+ if (src1.Kind == OperandKind.Constant && src2.Kind == OperandKind.LocalVariable)
+ {
+ constOp = src1;
+ otherOp = src2;
+ }
+ else if (src1.Kind == OperandKind.LocalVariable && src2.Kind == OperandKind.Constant)
+ {
+ constOp = src2;
+ otherOp = src1;
+ }
+ else
+ {
+ return 0;
+ }
+
+ // If we have addition by a constant that we can't encode on the instruction,
+ // then we can't optimize it further.
+ if (ConstTooLong(constOp, accessType))
+ {
+ return 0;
+ }
+
+ baseOp = otherOp;
+
+ return constOp.AsInt32();
+ }
+
+ private static (Operand, Multiplier) GetIndexOp(ref Operand baseOp, OperandType accessType)
+ {
+ Operand indexOp = default;
+
+ Multiplier scale = Multiplier.x1;
+
+ Operation addOp = GetAsgOpWithInst(baseOp, Instruction.Add);
+
+ if (addOp == default)
+ {
+ return (indexOp, scale);
+ }
+
+ Operand src1 = addOp.GetSource(0);
+ Operand src2 = addOp.GetSource(1);
+
+ if (src1.Kind != OperandKind.LocalVariable || src2.Kind != OperandKind.LocalVariable)
+ {
+ return (indexOp, scale);
+ }
+
+ baseOp = src1;
+ indexOp = src2;
+
+ Operation shlOp = GetAsgOpWithInst(src1, Instruction.ShiftLeft);
+
+ bool indexOnSrc2 = false;
+
+ if (shlOp == default)
+ {
+ shlOp = GetAsgOpWithInst(src2, Instruction.ShiftLeft);
+
+ indexOnSrc2 = true;
+ }
+
+ if (shlOp != default)
+ {
+ Operand shSrc = shlOp.GetSource(0);
+ Operand shift = shlOp.GetSource(1);
+
+ int maxShift = Assembler.GetScaleForType(accessType);
+
+ if (shSrc.Kind == OperandKind.LocalVariable &&
+ shift.Kind == OperandKind.Constant &&
+ (shift.Value == 0 || shift.Value == (ulong)maxShift))
+ {
+ scale = shift.Value switch
+ {
+ 1 => Multiplier.x2,
+ 2 => Multiplier.x4,
+ 3 => Multiplier.x8,
+ 4 => Multiplier.x16,
+ _ => Multiplier.x1
+ };
+
+ baseOp = indexOnSrc2 ? src1 : src2;
+ indexOp = shSrc;
+ }
+ }
+
+ return (indexOp, scale);
+ }
+
+ private static Operation GetAsgOpWithInst(Operand op, Instruction inst)
+ {
+ // If we have multiple assignments, folding is not safe
+ // as the value may be different depending on the
+ // control flow path.
+ if (op.AssignmentsCount != 1)
+ {
+ return default;
+ }
+
+ Operation asgOp = op.Assignments[0];
+
+ if (asgOp.Instruction != inst)
+ {
+ return default;
+ }
+
+ return asgOp;
+ }
+
+ private static bool IsMemoryLoadOrStore(Instruction inst)
+ {
+ return inst == Instruction.Load || inst == Instruction.Store;
+ }
+
+ private static bool ConstTooLong(Operand constOp, OperandType accessType)
+ {
+ if ((uint)constOp.Value != constOp.Value)
+ {
+ return true;
+ }
+
+ return !CodeGenCommon.ConstFitsOnUImm12(constOp.AsInt32(), accessType);
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/ArmCondition.cs b/src/ARMeilleure/CodeGen/Arm64/ArmCondition.cs
new file mode 100644
index 00000000..db27a810
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/ArmCondition.cs
@@ -0,0 +1,47 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ enum ArmCondition
+ {
+ Eq = 0,
+ Ne = 1,
+ GeUn = 2,
+ LtUn = 3,
+ Mi = 4,
+ Pl = 5,
+ Vs = 6,
+ Vc = 7,
+ GtUn = 8,
+ LeUn = 9,
+ Ge = 10,
+ Lt = 11,
+ Gt = 12,
+ Le = 13,
+ Al = 14,
+ Nv = 15
+ }
+
+ static class ComparisonArm64Extensions
+ {
+ public static ArmCondition ToArmCondition(this Comparison comp)
+ {
+ return comp switch
+ {
+ Comparison.Equal => ArmCondition.Eq,
+ Comparison.NotEqual => ArmCondition.Ne,
+ Comparison.Greater => ArmCondition.Gt,
+ Comparison.LessOrEqual => ArmCondition.Le,
+ Comparison.GreaterUI => ArmCondition.GtUn,
+ Comparison.LessOrEqualUI => ArmCondition.LeUn,
+ Comparison.GreaterOrEqual => ArmCondition.Ge,
+ Comparison.Less => ArmCondition.Lt,
+ Comparison.GreaterOrEqualUI => ArmCondition.GeUn,
+ Comparison.LessUI => ArmCondition.LtUn,
+
+ _ => throw new ArgumentException(null, nameof(comp))
+ };
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs b/src/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs
new file mode 100644
index 00000000..062a6d0b
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.CodeGen.Arm64
+{
+ enum ArmExtensionType
+ {
+ Uxtb = 0,
+ Uxth = 1,
+ Uxtw = 2,
+ Uxtx = 3,
+ Sxtb = 4,
+ Sxth = 5,
+ Sxtw = 6,
+ Sxtx = 7
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs b/src/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs
new file mode 100644
index 00000000..d223a146
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs
@@ -0,0 +1,11 @@
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ enum ArmShiftType
+ {
+ Lsl = 0,
+ Lsr = 1,
+ Asr = 2,
+ Ror = 3
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Arm64/Assembler.cs b/src/ARMeilleure/CodeGen/Arm64/Assembler.cs
new file mode 100644
index 00000000..0ec0be7c
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/Assembler.cs
@@ -0,0 +1,1160 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Diagnostics;
+using System.IO;
+using static ARMeilleure.IntermediateRepresentation.Operand;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ class Assembler
+ {
+ public const uint SfFlag = 1u << 31;
+
+ private const int SpRegister = 31;
+ private const int ZrRegister = 31;
+
+ private readonly Stream _stream;
+
+ public Assembler(Stream stream)
+ {
+ _stream = stream;
+ }
+
+ public void Add(Operand rd, Operand rn, Operand rm, ArmExtensionType extensionType, int shiftAmount = 0)
+ {
+ WriteInstructionAuto(0x0b200000u, rd, rn, rm, extensionType, shiftAmount);
+ }
+
+ public void Add(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0, bool immForm = false)
+ {
+ WriteInstructionAuto(0x11000000u, 0x0b000000u, rd, rn, rm, shiftType, shiftAmount, immForm);
+ }
+
+ public void And(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionBitwiseAuto(0x12000000u, 0x0a000000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+
+ public void Ands(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionBitwiseAuto(0x72000000u, 0x6a000000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+
+ public void Asr(Operand rd, Operand rn, Operand rm)
+ {
+ if (rm.Kind == OperandKind.Constant)
+ {
+ int shift = rm.AsInt32();
+ int mask = rd.Type == OperandType.I64 ? 63 : 31;
+ shift &= mask;
+ Sbfm(rd, rn, shift, mask);
+ }
+ else
+ {
+ Asrv(rd, rn, rm);
+ }
+ }
+
+ public void Asrv(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionBitwiseAuto(0x1ac02800u, rd, rn, rm);
+ }
+
+ public void B(int imm)
+ {
+ WriteUInt32(0x14000000u | EncodeSImm26_2(imm));
+ }
+
+ public void B(ArmCondition condition, int imm)
+ {
+ WriteUInt32(0x54000000u | (uint)condition | (EncodeSImm19_2(imm) << 5));
+ }
+
+ public void Blr(Operand rn)
+ {
+ WriteUInt32(0xd63f0000u | (EncodeReg(rn) << 5));
+ }
+
+ public void Br(Operand rn)
+ {
+ WriteUInt32(0xd61f0000u | (EncodeReg(rn) << 5));
+ }
+
+ public void Brk()
+ {
+ WriteUInt32(0xd4200000u);
+ }
+
+ public void Cbz(Operand rt, int imm)
+ {
+ WriteInstructionAuto(0x34000000u | (EncodeSImm19_2(imm) << 5), rt);
+ }
+
+ public void Cbnz(Operand rt, int imm)
+ {
+ WriteInstructionAuto(0x35000000u | (EncodeSImm19_2(imm) << 5), rt);
+ }
+
+ public void Clrex(int crm = 15)
+ {
+ WriteUInt32(0xd503305fu | (EncodeUImm4(crm) << 8));
+ }
+
+ public void Clz(Operand rd, Operand rn)
+ {
+ WriteInstructionAuto(0x5ac01000u, rd, rn);
+ }
+
+ public void CmeqVector(Operand rd, Operand rn, Operand rm, int size, bool q = true)
+ {
+ Debug.Assert((uint)size < 4);
+ WriteSimdInstruction(0x2e208c00u | ((uint)size << 22), rd, rn, rm, q);
+ }
+
+ public void Cmp(Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ Subs(Factory.Register(ZrRegister, RegisterType.Integer, rn.Type), rn, rm, shiftType, shiftAmount);
+ }
+
+ public void Csel(Operand rd, Operand rn, Operand rm, ArmCondition condition)
+ {
+ WriteInstructionBitwiseAuto(0x1a800000u | ((uint)condition << 12), rd, rn, rm);
+ }
+
+ public void Cset(Operand rd, ArmCondition condition)
+ {
+ var zr = Factory.Register(ZrRegister, RegisterType.Integer, rd.Type);
+ Csinc(rd, zr, zr, (ArmCondition)((int)condition ^ 1));
+ }
+
+ public void Csinc(Operand rd, Operand rn, Operand rm, ArmCondition condition)
+ {
+ WriteInstructionBitwiseAuto(0x1a800400u | ((uint)condition << 12), rd, rn, rm);
+ }
+
+ public void Dmb(uint option)
+ {
+ WriteUInt32(0xd50330bfu | (option << 8));
+ }
+
+ public void DupScalar(Operand rd, Operand rn, int index, int size)
+ {
+ WriteInstruction(0x5e000400u | (EncodeIndexSizeImm5(index, size) << 16), rd, rn);
+ }
+
+ public void Eor(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionBitwiseAuto(0x52000000u, 0x4a000000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+
+ public void EorVector(Operand rd, Operand rn, Operand rm, bool q = true)
+ {
+ WriteSimdInstruction(0x2e201c00u, rd, rn, rm, q);
+ }
+
+ public void Extr(Operand rd, Operand rn, Operand rm, int imms)
+ {
+ uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u;
+ WriteInstructionBitwiseAuto(0x13800000u | n | (EncodeUImm6(imms) << 10), rd, rn, rm);
+ }
+
+ public void FaddScalar(Operand rd, Operand rn, Operand rm)
+ {
+ WriteFPInstructionAuto(0x1e202800u, rd, rn, rm);
+ }
+
+ public void FcvtScalar(Operand rd, Operand rn)
+ {
+ uint instruction = 0x1e224000u | (rd.Type == OperandType.FP64 ? 1u << 15 : 1u << 22);
+ WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5));
+ }
+
+ public void FdivScalar(Operand rd, Operand rn, Operand rm)
+ {
+ WriteFPInstructionAuto(0x1e201800u, rd, rn, rm);
+ }
+
+ public void Fmov(Operand rd, Operand rn)
+ {
+ WriteFPInstructionAuto(0x1e204000u, rd, rn);
+ }
+
+ public void Fmov(Operand rd, Operand rn, bool topHalf)
+ {
+ Debug.Assert(rd.Type.IsInteger() != rn.Type.IsInteger());
+ Debug.Assert(rd.Type == OperandType.I64 || rn.Type == OperandType.I64 || !topHalf);
+
+ uint opcode = rd.Type.IsInteger() ? 0b110u : 0b111u;
+
+ uint rmode = topHalf ? 1u << 19 : 0u;
+ uint ftype = rd.Type == OperandType.FP64 || rn.Type == OperandType.FP64 ? 1u << 22 : 0u;
+ uint sf = rd.Type == OperandType.I64 || rn.Type == OperandType.I64 ? SfFlag : 0u;
+
+ WriteUInt32(0x1e260000u | (opcode << 16) | rmode | ftype | sf | EncodeReg(rd) | (EncodeReg(rn) << 5));
+ }
+
+ public void FmulScalar(Operand rd, Operand rn, Operand rm)
+ {
+ WriteFPInstructionAuto(0x1e200800u, rd, rn, rm);
+ }
+
+ public void FnegScalar(Operand rd, Operand rn)
+ {
+ WriteFPInstructionAuto(0x1e214000u, rd, rn);
+ }
+
+ public void FsubScalar(Operand rd, Operand rn, Operand rm)
+ {
+ WriteFPInstructionAuto(0x1e203800u, rd, rn, rm);
+ }
+
+ public void Ins(Operand rd, Operand rn, int index, int size)
+ {
+ WriteInstruction(0x4e001c00u | (EncodeIndexSizeImm5(index, size) << 16), rd, rn);
+ }
+
+ public void Ins(Operand rd, Operand rn, int srcIndex, int dstIndex, int size)
+ {
+ uint imm4 = (uint)srcIndex << size;
+ Debug.Assert((uint)srcIndex < (16u >> size));
+ WriteInstruction(0x6e000400u | (imm4 << 11) | (EncodeIndexSizeImm5(dstIndex, size) << 16), rd, rn);
+ }
+
+ public void Ldaxp(Operand rt, Operand rt2, Operand rn)
+ {
+ WriteInstruction(0x887f8000u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rt2);
+ }
+
+ public void Ldaxr(Operand rt, Operand rn)
+ {
+ WriteInstruction(0x085ffc00u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn);
+ }
+
+ public void Ldaxrb(Operand rt, Operand rn)
+ {
+ WriteInstruction(0x085ffc00u, rt, rn);
+ }
+
+ public void Ldaxrh(Operand rt, Operand rn)
+ {
+ WriteInstruction(0x085ffc00u | (1u << 30), rt, rn);
+ }
+
+ public void LdpRiPost(Operand rt, Operand rt2, Operand rn, int imm)
+ {
+ uint instruction = GetLdpStpInstruction(0x28c00000u, 0x2cc00000u, imm, rt.Type);
+ WriteInstruction(instruction, rt, rn, rt2);
+ }
+
+ public void LdpRiPre(Operand rt, Operand rt2, Operand rn, int imm)
+ {
+ uint instruction = GetLdpStpInstruction(0x29c00000u, 0x2dc00000u, imm, rt.Type);
+ WriteInstruction(instruction, rt, rn, rt2);
+ }
+
+ public void LdpRiUn(Operand rt, Operand rt2, Operand rn, int imm)
+ {
+ uint instruction = GetLdpStpInstruction(0x29400000u, 0x2d400000u, imm, rt.Type);
+ WriteInstruction(instruction, rt, rn, rt2);
+ }
+
+ public void Ldr(Operand rt, Operand rn)
+ {
+ if (rn.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = rn.GetMemory();
+
+ if (memOp.Index != default)
+ {
+ Debug.Assert(memOp.Displacement == 0);
+ Debug.Assert(memOp.Scale == Multiplier.x1 || (int)memOp.Scale == GetScaleForType(rt.Type));
+ LdrRr(rt, memOp.BaseAddress, memOp.Index, ArmExtensionType.Uxtx, memOp.Scale != Multiplier.x1);
+ }
+ else
+ {
+ LdrRiUn(rt, memOp.BaseAddress, memOp.Displacement);
+ }
+ }
+ else
+ {
+ LdrRiUn(rt, rn, 0);
+ }
+ }
+
+ public void LdrLit(Operand rt, int offset)
+ {
+ uint instruction = 0x18000000u | (EncodeSImm19_2(offset) << 5);
+
+ if (rt.Type == OperandType.I64)
+ {
+ instruction |= 1u << 30;
+ }
+
+ WriteInstruction(instruction, rt);
+ }
+
+ public void LdrRiPost(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8400400u, 0x3c400400u, rt.Type) | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void LdrRiPre(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8400c00u, 0x3c400c00u, rt.Type) | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void LdrRiUn(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb9400000u, 0x3d400000u, rt.Type) | (EncodeUImm12(imm, rt.Type) << 10);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void LdrRr(Operand rt, Operand rn, Operand rm, ArmExtensionType extensionType, bool shift)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8600800u, 0x3ce00800u, rt.Type);
+ WriteInstructionLdrStrAuto(instruction, rt, rn, rm, extensionType, shift);
+ }
+
+ public void LdrbRiPost(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x38400400u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void LdrbRiPre(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x38400c00u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void LdrbRiUn(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x39400000u | (EncodeUImm12(imm, 0) << 10);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void LdrhRiPost(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x78400400u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void LdrhRiPre(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x78400c00u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void LdrhRiUn(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x79400000u | (EncodeUImm12(imm, 1) << 10);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void Ldur(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8400000u, 0x3c400000u, rt.Type) | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void Lsl(Operand rd, Operand rn, Operand rm)
+ {
+ if (rm.Kind == OperandKind.Constant)
+ {
+ int shift = rm.AsInt32();
+ int mask = rd.Type == OperandType.I64 ? 63 : 31;
+ shift &= mask;
+ Ubfm(rd, rn, -shift & mask, mask - shift);
+ }
+ else
+ {
+ Lslv(rd, rn, rm);
+ }
+ }
+
+ public void Lslv(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionBitwiseAuto(0x1ac02000u, rd, rn, rm);
+ }
+
+ public void Lsr(Operand rd, Operand rn, Operand rm)
+ {
+ if (rm.Kind == OperandKind.Constant)
+ {
+ int shift = rm.AsInt32();
+ int mask = rd.Type == OperandType.I64 ? 63 : 31;
+ shift &= mask;
+ Ubfm(rd, rn, shift, mask);
+ }
+ else
+ {
+ Lsrv(rd, rn, rm);
+ }
+ }
+
+ public void Lsrv(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionBitwiseAuto(0x1ac02400u, rd, rn, rm);
+ }
+
+ public void Madd(Operand rd, Operand rn, Operand rm, Operand ra)
+ {
+ WriteInstructionAuto(0x1b000000u, rd, rn, rm, ra);
+ }
+
+ public void Mul(Operand rd, Operand rn, Operand rm)
+ {
+ Madd(rd, rn, rm, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type));
+ }
+
+ public void Mov(Operand rd, Operand rn)
+ {
+ if (rd.Type.IsInteger())
+ {
+ Orr(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn);
+ }
+ else
+ {
+ OrrVector(rd, rn, rn);
+ }
+ }
+
+ public void MovSp(Operand rd, Operand rn)
+ {
+ if (rd.GetRegister().Index == SpRegister ||
+ rn.GetRegister().Index == SpRegister)
+ {
+ Add(rd, rn, Factory.Const(rd.Type, 0), immForm: true);
+ }
+ else
+ {
+ Mov(rd, rn);
+ }
+ }
+
+ public void Mov(Operand rd, int imm)
+ {
+ Movz(rd, imm, 0);
+ }
+
+ public void Movz(Operand rd, int imm, int hw)
+ {
+ Debug.Assert((hw & (rd.Type == OperandType.I64 ? 3 : 1)) == hw);
+ WriteInstructionAuto(0x52800000u | (EncodeUImm16(imm) << 5) | ((uint)hw << 21), rd);
+ }
+
+ public void Movk(Operand rd, int imm, int hw)
+ {
+ Debug.Assert((hw & (rd.Type == OperandType.I64 ? 3 : 1)) == hw);
+ WriteInstructionAuto(0x72800000u | (EncodeUImm16(imm) << 5) | ((uint)hw << 21), rd);
+ }
+
+ public void Mrs(Operand rt, uint o0, uint op1, uint crn, uint crm, uint op2)
+ {
+ uint instruction = 0xd5300000u;
+
+ instruction |= (op2 & 7) << 5;
+ instruction |= (crm & 15) << 8;
+ instruction |= (crn & 15) << 12;
+ instruction |= (op1 & 7) << 16;
+ instruction |= (o0 & 1) << 19;
+
+ WriteInstruction(instruction, rt);
+ }
+
+ public void Mvn(Operand rd, Operand rn, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ Orn(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn, shiftType, shiftAmount);
+ }
+
+ public void Neg(Operand rd, Operand rn, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ Sub(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn, shiftType, shiftAmount);
+ }
+
+ public void Orn(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionBitwiseAuto(0x2a200000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+
+ public void Orr(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionBitwiseAuto(0x32000000u, 0x2a000000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+
+ public void OrrVector(Operand rd, Operand rn, Operand rm, bool q = true)
+ {
+ WriteSimdInstruction(0x0ea01c00u, rd, rn, rm, q);
+ }
+
+ public void Ret(Operand rn)
+ {
+ WriteUInt32(0xd65f0000u | (EncodeReg(rn) << 5));
+ }
+
+ public void Rev(Operand rd, Operand rn)
+ {
+ uint opc0 = rd.Type == OperandType.I64 ? 1u << 10 : 0u;
+ WriteInstructionAuto(0x5ac00800u | opc0, rd, rn);
+ }
+
+ public void Ror(Operand rd, Operand rn, Operand rm)
+ {
+ if (rm.Kind == OperandKind.Constant)
+ {
+ int shift = rm.AsInt32();
+ int mask = rd.Type == OperandType.I64 ? 63 : 31;
+ shift &= mask;
+ Extr(rd, rn, rn, shift);
+ }
+ else
+ {
+ Rorv(rd, rn, rm);
+ }
+ }
+
+ public void Rorv(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionBitwiseAuto(0x1ac02c00u, rd, rn, rm);
+ }
+
+ public void Sbfm(Operand rd, Operand rn, int immr, int imms)
+ {
+ uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u;
+ WriteInstructionAuto(0x13000000u | n | (EncodeUImm6(imms) << 10) | (EncodeUImm6(immr) << 16), rd, rn);
+ }
+
+ public void ScvtfScalar(Operand rd, Operand rn)
+ {
+ uint instruction = 0x1e220000u;
+
+ if (rn.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+
+ WriteFPInstructionAuto(instruction, rd, rn);
+ }
+
+ public void Sdiv(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionRm16Auto(0x1ac00c00u, rd, rn, rm);
+ }
+
+ public void Smulh(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionRm16(0x9b407c00u, rd, rn, rm);
+ }
+
+ public void Stlxp(Operand rt, Operand rt2, Operand rn, Operand rs)
+ {
+ WriteInstruction(0x88208000u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rs, rt2);
+ }
+
+ public void Stlxr(Operand rt, Operand rn, Operand rs)
+ {
+ WriteInstructionRm16(0x0800fc00u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rs);
+ }
+
+ public void Stlxrb(Operand rt, Operand rn, Operand rs)
+ {
+ WriteInstructionRm16(0x0800fc00u, rt, rn, rs);
+ }
+
+ public void Stlxrh(Operand rt, Operand rn, Operand rs)
+ {
+ WriteInstructionRm16(0x0800fc00u | (1u << 30), rt, rn, rs);
+ }
+
+ public void StpRiPost(Operand rt, Operand rt2, Operand rn, int imm)
+ {
+ uint instruction = GetLdpStpInstruction(0x28800000u, 0x2c800000u, imm, rt.Type);
+ WriteInstruction(instruction, rt, rn, rt2);
+ }
+
+ public void StpRiPre(Operand rt, Operand rt2, Operand rn, int imm)
+ {
+ uint instruction = GetLdpStpInstruction(0x29800000u, 0x2d800000u, imm, rt.Type);
+ WriteInstruction(instruction, rt, rn, rt2);
+ }
+
+ public void StpRiUn(Operand rt, Operand rt2, Operand rn, int imm)
+ {
+ uint instruction = GetLdpStpInstruction(0x29000000u, 0x2d000000u, imm, rt.Type);
+ WriteInstruction(instruction, rt, rn, rt2);
+ }
+
+ public void Str(Operand rt, Operand rn)
+ {
+ if (rn.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = rn.GetMemory();
+
+ if (memOp.Index != default)
+ {
+ Debug.Assert(memOp.Displacement == 0);
+ Debug.Assert(memOp.Scale == Multiplier.x1 || (int)memOp.Scale == GetScaleForType(rt.Type));
+ StrRr(rt, memOp.BaseAddress, memOp.Index, ArmExtensionType.Uxtx, memOp.Scale != Multiplier.x1);
+ }
+ else
+ {
+ StrRiUn(rt, memOp.BaseAddress, memOp.Displacement);
+ }
+ }
+ else
+ {
+ StrRiUn(rt, rn, 0);
+ }
+ }
+
+ public void StrRiPost(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8000400u, 0x3c000400u, rt.Type) | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void StrRiPre(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8000c00u, 0x3c000c00u, rt.Type) | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void StrRiUn(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb9000000u, 0x3d000000u, rt.Type) | (EncodeUImm12(imm, rt.Type) << 10);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void StrRr(Operand rt, Operand rn, Operand rm, ArmExtensionType extensionType, bool shift)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8200800u, 0x3ca00800u, rt.Type);
+ WriteInstructionLdrStrAuto(instruction, rt, rn, rm, extensionType, shift);
+ }
+
+ public void StrbRiPost(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x38000400u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void StrbRiPre(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x38000c00u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void StrbRiUn(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x39000000u | (EncodeUImm12(imm, 0) << 10);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void StrhRiPost(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x78000400u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void StrhRiPre(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x78000c00u | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void StrhRiUn(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = 0x79000000u | (EncodeUImm12(imm, 1) << 10);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void Stur(Operand rt, Operand rn, int imm)
+ {
+ uint instruction = GetLdrStrInstruction(0xb8000000u, 0x3c000000u, rt.Type) | (EncodeSImm9(imm) << 12);
+ WriteInstruction(instruction, rt, rn);
+ }
+
+ public void Sub(Operand rd, Operand rn, Operand rm, ArmExtensionType extensionType, int shiftAmount = 0)
+ {
+ WriteInstructionAuto(0x4b200000u, rd, rn, rm, extensionType, shiftAmount);
+ }
+
+ public void Sub(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionAuto(0x51000000u, 0x4b000000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+
+ public void Subs(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ WriteInstructionAuto(0x71000000u, 0x6b000000u, rd, rn, rm, shiftType, shiftAmount);
+ }
+
+ public void Sxtb(Operand rd, Operand rn)
+ {
+ Sbfm(rd, rn, 0, 7);
+ }
+
+ public void Sxth(Operand rd, Operand rn)
+ {
+ Sbfm(rd, rn, 0, 15);
+ }
+
+ public void Sxtw(Operand rd, Operand rn)
+ {
+ Sbfm(rd, rn, 0, 31);
+ }
+
+ public void Tst(Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0)
+ {
+ Ands(Factory.Register(ZrRegister, RegisterType.Integer, rn.Type), rn, rm, shiftType, shiftAmount);
+ }
+
+ public void Ubfm(Operand rd, Operand rn, int immr, int imms)
+ {
+ uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u;
+ WriteInstructionAuto(0x53000000u | n | (EncodeUImm6(imms) << 10) | (EncodeUImm6(immr) << 16), rd, rn);
+ }
+
+ public void UcvtfScalar(Operand rd, Operand rn)
+ {
+ uint instruction = 0x1e230000u;
+
+ if (rn.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+
+ WriteFPInstructionAuto(instruction, rd, rn);
+ }
+
+ public void Udiv(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionRm16Auto(0x1ac00800u, rd, rn, rm);
+ }
+
+ public void Umov(Operand rd, Operand rn, int index, int size)
+ {
+ uint q = size == 3 ? 1u << 30 : 0u;
+ WriteInstruction(0x0e003c00u | (EncodeIndexSizeImm5(index, size) << 16) | q, rd, rn);
+ }
+
+ public void Umulh(Operand rd, Operand rn, Operand rm)
+ {
+ WriteInstructionRm16(0x9bc07c00u, rd, rn, rm);
+ }
+
+ public void Uxtb(Operand rd, Operand rn)
+ {
+ Ubfm(rd, rn, 0, 7);
+ }
+
+ public void Uxth(Operand rd, Operand rn)
+ {
+ Ubfm(rd, rn, 0, 15);
+ }
+
+ private void WriteInstructionAuto(
+ uint instI,
+ uint instR,
+ Operand rd,
+ Operand rn,
+ Operand rm,
+ ArmShiftType shiftType = ArmShiftType.Lsl,
+ int shiftAmount = 0,
+ bool immForm = false)
+ {
+ if (rm.Kind == OperandKind.Constant && (rm.Value != 0 || immForm))
+ {
+ Debug.Assert(shiftAmount == 0);
+ int imm = rm.AsInt32();
+ Debug.Assert((uint)imm == rm.Value);
+ if (imm != 0 && (imm & 0xfff) == 0)
+ {
+ instI |= 1 << 22; // sh flag
+ imm >>= 12;
+ }
+ WriteInstructionAuto(instI | (EncodeUImm12(imm, 0) << 10), rd, rn);
+ }
+ else
+ {
+ instR |= EncodeUImm6(shiftAmount) << 10;
+ instR |= (uint)shiftType << 22;
+
+ WriteInstructionRm16Auto(instR, rd, rn, rm);
+ }
+ }
+
+ private void WriteInstructionAuto(
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ Operand rm,
+ ArmExtensionType extensionType,
+ int shiftAmount = 0)
+ {
+ Debug.Assert((uint)shiftAmount <= 4);
+
+ instruction |= (uint)shiftAmount << 10;
+ instruction |= (uint)extensionType << 13;
+
+ WriteInstructionRm16Auto(instruction, rd, rn, rm);
+ }
+
+ private void WriteInstructionBitwiseAuto(
+ uint instI,
+ uint instR,
+ Operand rd,
+ Operand rn,
+ Operand rm,
+ ArmShiftType shiftType = ArmShiftType.Lsl,
+ int shiftAmount = 0)
+ {
+ if (rm.Kind == OperandKind.Constant && rm.Value != 0)
+ {
+ Debug.Assert(shiftAmount == 0);
+ bool canEncode = CodeGenCommon.TryEncodeBitMask(rm, out int immN, out int immS, out int immR);
+ Debug.Assert(canEncode);
+ uint instruction = instI | ((uint)immS << 10) | ((uint)immR << 16) | ((uint)immN << 22);
+
+ WriteInstructionAuto(instruction, rd, rn);
+ }
+ else
+ {
+ WriteInstructionBitwiseAuto(instR, rd, rn, rm, shiftType, shiftAmount);
+ }
+ }
+
+ private void WriteInstructionBitwiseAuto(
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ Operand rm,
+ ArmShiftType shiftType = ArmShiftType.Lsl,
+ int shiftAmount = 0)
+ {
+ if (rd.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+
+ instruction |= EncodeUImm6(shiftAmount) << 10;
+ instruction |= (uint)shiftType << 22;
+
+ WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ private void WriteInstructionLdrStrAuto(
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ Operand rm,
+ ArmExtensionType extensionType,
+ bool shift)
+ {
+ if (shift)
+ {
+ instruction |= 1u << 12;
+ }
+
+ instruction |= (uint)extensionType << 13;
+
+ if (rd.Type == OperandType.I64)
+ {
+ instruction |= 1u << 30;
+ }
+
+ WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ private void WriteInstructionAuto(uint instruction, Operand rd)
+ {
+ if (rd.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+
+ WriteInstruction(instruction, rd);
+ }
+
+ public void WriteInstructionAuto(uint instruction, Operand rd, Operand rn)
+ {
+ if (rd.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+
+ WriteInstruction(instruction, rd, rn);
+ }
+
+ private void WriteInstructionAuto(uint instruction, Operand rd, Operand rn, Operand rm, Operand ra)
+ {
+ if (rd.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+
+ WriteInstruction(instruction, rd, rn, rm, ra);
+ }
+
+ public void WriteInstruction(uint instruction, Operand rd)
+ {
+ WriteUInt32(instruction | EncodeReg(rd));
+ }
+
+ public void WriteInstruction(uint instruction, Operand rd, Operand rn)
+ {
+ WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5));
+ }
+
+ public void WriteInstruction(uint instruction, Operand rd, Operand rn, Operand rm)
+ {
+ WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 10));
+ }
+
+ public void WriteInstruction(uint instruction, Operand rd, Operand rn, Operand rm, Operand ra)
+ {
+ WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(ra) << 10) | (EncodeReg(rm) << 16));
+ }
+
+ private void WriteFPInstructionAuto(uint instruction, Operand rd, Operand rn)
+ {
+ if (rd.Type == OperandType.FP64)
+ {
+ instruction |= 1u << 22;
+ }
+
+ WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5));
+ }
+
+ private void WriteFPInstructionAuto(uint instruction, Operand rd, Operand rn, Operand rm)
+ {
+ if (rd.Type == OperandType.FP64)
+ {
+ instruction |= 1u << 22;
+ }
+
+ WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ private void WriteSimdInstruction(uint instruction, Operand rd, Operand rn, Operand rm, bool q = true)
+ {
+ if (q)
+ {
+ instruction |= 1u << 30;
+ }
+
+ WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ private void WriteInstructionRm16Auto(uint instruction, Operand rd, Operand rn, Operand rm)
+ {
+ if (rd.Type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ }
+
+ WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ public void WriteInstructionRm16(uint instruction, Operand rd, Operand rn, Operand rm)
+ {
+ WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 16));
+ }
+
+ public void WriteInstructionRm16NoRet(uint instruction, Operand rn, Operand rm)
+ {
+ WriteUInt32(instruction | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 16));
+ }
+
+ private static uint GetLdpStpInstruction(uint intInst, uint vecInst, int imm, OperandType type)
+ {
+ uint instruction;
+ int scale;
+
+ if (type.IsInteger())
+ {
+ instruction = intInst;
+
+ if (type == OperandType.I64)
+ {
+ instruction |= SfFlag;
+ scale = 3;
+ }
+ else
+ {
+ scale = 2;
+ }
+ }
+ else
+ {
+ int opc = type switch
+ {
+ OperandType.FP32 => 0,
+ OperandType.FP64 => 1,
+ _ => 2
+ };
+
+ instruction = vecInst | ((uint)opc << 30);
+ scale = 2 + opc;
+ }
+
+ instruction |= (EncodeSImm7(imm, scale) << 15);
+
+ return instruction;
+ }
+
+ private static uint GetLdrStrInstruction(uint intInst, uint vecInst, OperandType type)
+ {
+ uint instruction;
+
+ if (type.IsInteger())
+ {
+ instruction = intInst;
+
+ if (type == OperandType.I64)
+ {
+ instruction |= 1 << 30;
+ }
+ }
+ else
+ {
+ instruction = vecInst;
+
+ if (type == OperandType.V128)
+ {
+ instruction |= 1u << 23;
+ }
+ else
+ {
+ instruction |= type == OperandType.FP32 ? 2u << 30 : 3u << 30;
+ }
+ }
+
+ return instruction;
+ }
+
+ private static uint EncodeIndexSizeImm5(int index, int size)
+ {
+ Debug.Assert((uint)size < 4);
+ Debug.Assert((uint)index < (16u >> size), $"Invalid index {index} and size {size} combination.");
+ return ((uint)index << (size + 1)) | (1u << size);
+ }
+
+ private static uint EncodeSImm7(int value, int scale)
+ {
+ uint imm = (uint)(value >> scale) & 0x7f;
+ Debug.Assert(((int)imm << 25) >> (25 - scale) == value, $"Failed to encode constant 0x{value:X} with scale {scale}.");
+ return imm;
+ }
+
+ private static uint EncodeSImm9(int value)
+ {
+ uint imm = (uint)value & 0x1ff;
+ Debug.Assert(((int)imm << 23) >> 23 == value, $"Failed to encode constant 0x{value:X}.");
+ return imm;
+ }
+
+ private static uint EncodeSImm19_2(int value)
+ {
+ uint imm = (uint)(value >> 2) & 0x7ffff;
+ Debug.Assert(((int)imm << 13) >> 11 == value, $"Failed to encode constant 0x{value:X}.");
+ return imm;
+ }
+
+ private static uint EncodeSImm26_2(int value)
+ {
+ uint imm = (uint)(value >> 2) & 0x3ffffff;
+ Debug.Assert(((int)imm << 6) >> 4 == value, $"Failed to encode constant 0x{value:X}.");
+ return imm;
+ }
+
+ private static uint EncodeUImm4(int value)
+ {
+ uint imm = (uint)value & 0xf;
+ Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}.");
+ return imm;
+ }
+
+ private static uint EncodeUImm6(int value)
+ {
+ uint imm = (uint)value & 0x3f;
+ Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}.");
+ return imm;
+ }
+
+ private static uint EncodeUImm12(int value, OperandType type)
+ {
+ return EncodeUImm12(value, GetScaleForType(type));
+ }
+
+ private static uint EncodeUImm12(int value, int scale)
+ {
+ uint imm = (uint)(value >> scale) & 0xfff;
+ Debug.Assert((int)imm << scale == value, $"Failed to encode constant 0x{value:X} with scale {scale}.");
+ return imm;
+ }
+
+ private static uint EncodeUImm16(int value)
+ {
+ uint imm = (uint)value & 0xffff;
+ Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}.");
+ return imm;
+ }
+
+ private static uint EncodeReg(Operand reg)
+ {
+ if (reg.Kind == OperandKind.Constant && reg.Value == 0)
+ {
+ return ZrRegister;
+ }
+
+ uint regIndex = (uint)reg.GetRegister().Index;
+ Debug.Assert(reg.Kind == OperandKind.Register);
+ Debug.Assert(regIndex < 32);
+ return regIndex;
+ }
+
+ public static int GetScaleForType(OperandType type)
+ {
+ return type switch
+ {
+ OperandType.I32 => 2,
+ OperandType.I64 => 3,
+ OperandType.FP32 => 2,
+ OperandType.FP64 => 3,
+ OperandType.V128 => 4,
+ _ => throw new ArgumentException($"Invalid type {type}.")
+ };
+ }
+
+ private void WriteInt16(short value)
+ {
+ WriteUInt16((ushort)value);
+ }
+
+ private void WriteInt32(int value)
+ {
+ WriteUInt32((uint)value);
+ }
+
+ private void WriteByte(byte value)
+ {
+ _stream.WriteByte(value);
+ }
+
+ private void WriteUInt16(ushort value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ }
+
+ private void WriteUInt32(uint value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ _stream.WriteByte((byte)(value >> 16));
+ _stream.WriteByte((byte)(value >> 24));
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/CallingConvention.cs b/src/ARMeilleure/CodeGen/Arm64/CallingConvention.cs
new file mode 100644
index 00000000..fda8d786
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/CallingConvention.cs
@@ -0,0 +1,96 @@
+using System;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static class CallingConvention
+ {
+ private const int RegistersMask = unchecked((int)0xffffffff);
+
+ // Some of those register have specific roles and can't be used as general purpose registers.
+ // X18 - Reserved for platform specific usage.
+ // X29 - Frame pointer.
+ // X30 - Return address.
+ // X31 - Not an actual register, in some cases maps to SP, and in others to ZR.
+ private const int ReservedRegsMask = (1 << CodeGenCommon.ReservedRegister) | (1 << 18) | (1 << 29) | (1 << 30) | (1 << 31);
+
+ public static int GetIntAvailableRegisters()
+ {
+ return RegistersMask & ~ReservedRegsMask;
+ }
+
+ public static int GetVecAvailableRegisters()
+ {
+ return RegistersMask;
+ }
+
+ public static int GetIntCallerSavedRegisters()
+ {
+ return (GetIntCalleeSavedRegisters() ^ RegistersMask) & ~ReservedRegsMask;
+ }
+
+ public static int GetFpCallerSavedRegisters()
+ {
+ return GetFpCalleeSavedRegisters() ^ RegistersMask;
+ }
+
+ public static int GetVecCallerSavedRegisters()
+ {
+ return GetVecCalleeSavedRegisters() ^ RegistersMask;
+ }
+
+ public static int GetIntCalleeSavedRegisters()
+ {
+ return 0x1ff80000; // X19 to X28
+ }
+
+ public static int GetFpCalleeSavedRegisters()
+ {
+ return 0xff00; // D8 to D15
+ }
+
+ public static int GetVecCalleeSavedRegisters()
+ {
+ return 0;
+ }
+
+ public static int GetArgumentsOnRegsCount()
+ {
+ return 8;
+ }
+
+ public static int GetIntArgumentRegister(int index)
+ {
+ if ((uint)index < (uint)GetArgumentsOnRegsCount())
+ {
+ return index;
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ public static int GetVecArgumentRegister(int index)
+ {
+ if ((uint)index < (uint)GetArgumentsOnRegsCount())
+ {
+ return index;
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ public static int GetIntReturnRegister()
+ {
+ return 0;
+ }
+
+ public static int GetIntReturnRegisterHigh()
+ {
+ return 1;
+ }
+
+ public static int GetVecReturnRegister()
+ {
+ return 0;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs b/src/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs
new file mode 100644
index 00000000..8d1e597b
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs
@@ -0,0 +1,91 @@
+using ARMeilleure.IntermediateRepresentation;
+using System.Numerics;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static class CodeGenCommon
+ {
+ public const int TcAddressRegister = 8;
+ public const int ReservedRegister = 17;
+
+ public static bool ConstFitsOnSImm7(int value, int scale)
+ {
+ return (((value >> scale) << 25) >> (25 - scale)) == value;
+ }
+
+ public static bool ConstFitsOnSImm9(int value)
+ {
+ return ((value << 23) >> 23) == value;
+ }
+
+ public static bool ConstFitsOnUImm12(int value)
+ {
+ return (value & 0xfff) == value;
+ }
+
+ public static bool ConstFitsOnUImm12(int value, OperandType type)
+ {
+ int scale = Assembler.GetScaleForType(type);
+ return (((value >> scale) & 0xfff) << scale) == value;
+ }
+
+ public static bool TryEncodeBitMask(Operand operand, out int immN, out int immS, out int immR)
+ {
+ return TryEncodeBitMask(operand.Type, operand.Value, out immN, out immS, out immR);
+ }
+
+ public static bool TryEncodeBitMask(OperandType type, ulong value, out int immN, out int immS, out int immR)
+ {
+ if (type == OperandType.I32)
+ {
+ value |= value << 32;
+ }
+
+ return TryEncodeBitMask(value, out immN, out immS, out immR);
+ }
+
+ public static bool TryEncodeBitMask(ulong value, out int immN, out int immS, out int immR)
+ {
+ // Some special values also can't be encoded:
+ // 0 can't be encoded because we need to subtract 1 from onesCount (which would became negative if 0).
+ // A value with all bits set can't be encoded because it is reserved according to the spec, because:
+ // Any value AND all ones will be equal itself, so it's effectively a no-op.
+ // Any value OR all ones will be equal all ones, so one can just use MOV.
+ // Any value XOR all ones will be equal its inverse, so one can just use MVN.
+ if (value == 0 || value == ulong.MaxValue)
+ {
+ immN = 0;
+ immS = 0;
+ immR = 0;
+
+ return false;
+ }
+
+ // Normalize value, rotating it such that the LSB is 1: Ensures we get a complete element that has not
+ // been cut-in-half across the word boundary.
+ int rotation = BitOperations.TrailingZeroCount(value & (value + 1));
+ ulong rotatedValue = ulong.RotateRight(value, rotation);
+
+ // Now that we have a complete element in the LSB with the LSB = 1, determine size and number of ones
+ // in element.
+ int elementSize = BitOperations.TrailingZeroCount(rotatedValue & (rotatedValue + 1));
+ int onesInElement = BitOperations.TrailingZeroCount(~rotatedValue);
+
+ // Check the value is repeating; also ensures element size is a power of two.
+ if (ulong.RotateRight(value, elementSize) != value)
+ {
+ immN = 0;
+ immS = 0;
+ immR = 0;
+
+ return false;
+ }
+
+ immN = (elementSize >> 6) & 1;
+ immS = (((~elementSize + 1) << 1) | (onesInElement - 1)) & 0x3f;
+ immR = (elementSize - rotation) & (elementSize - 1);
+
+ return true;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs b/src/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs
new file mode 100644
index 00000000..0dd5355f
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs
@@ -0,0 +1,287 @@
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.IntermediateRepresentation;
+using Ryujinx.Common.Memory;
+using System;
+using System.Collections.Generic;
+using System.IO;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ class CodeGenContext
+ {
+ private const int BccInstLength = 4;
+ private const int CbnzInstLength = 4;
+ private const int LdrLitInstLength = 4;
+
+ private Stream _stream;
+
+ public int StreamOffset => (int)_stream.Length;
+
+ public AllocationResult AllocResult { get; }
+
+ public Assembler Assembler { get; }
+
+ public BasicBlock CurrBlock { get; private set; }
+
+ public bool HasCall { get; }
+
+ public int CallArgsRegionSize { get; }
+ public int FpLrSaveRegionSize { get; }
+
+ private readonly Dictionary<BasicBlock, long> _visitedBlocks;
+ private readonly Dictionary<BasicBlock, List<(ArmCondition Condition, long BranchPos)>> _pendingBranches;
+
+ private struct ConstantPoolEntry
+ {
+ public readonly int Offset;
+ public readonly Symbol Symbol;
+ public readonly List<(Operand, int)> LdrOffsets;
+
+ public ConstantPoolEntry(int offset, Symbol symbol)
+ {
+ Offset = offset;
+ Symbol = symbol;
+ LdrOffsets = new List<(Operand, int)>();
+ }
+ }
+
+ private readonly Dictionary<ulong, ConstantPoolEntry> _constantPool;
+
+ private bool _constantPoolWritten;
+ private long _constantPoolOffset;
+
+ private ArmCondition _jNearCondition;
+ private Operand _jNearValue;
+
+ private long _jNearPosition;
+
+ private readonly bool _relocatable;
+
+ public CodeGenContext(AllocationResult allocResult, int maxCallArgs, int blocksCount, bool relocatable)
+ {
+ _stream = MemoryStreamManager.Shared.GetStream();
+
+ AllocResult = allocResult;
+
+ Assembler = new Assembler(_stream);
+
+ bool hasCall = maxCallArgs >= 0;
+
+ HasCall = hasCall;
+
+ if (maxCallArgs < 0)
+ {
+ maxCallArgs = 0;
+ }
+
+ CallArgsRegionSize = maxCallArgs * 16;
+ FpLrSaveRegionSize = hasCall ? 16 : 0;
+
+ _visitedBlocks = new Dictionary<BasicBlock, long>();
+ _pendingBranches = new Dictionary<BasicBlock, List<(ArmCondition, long)>>();
+ _constantPool = new Dictionary<ulong, ConstantPoolEntry>();
+
+ _relocatable = relocatable;
+ }
+
+ public void EnterBlock(BasicBlock block)
+ {
+ CurrBlock = block;
+
+ long target = _stream.Position;
+
+ if (_pendingBranches.TryGetValue(block, out var list))
+ {
+ foreach (var tuple in list)
+ {
+ _stream.Seek(tuple.BranchPos, SeekOrigin.Begin);
+ WriteBranch(tuple.Condition, target);
+ }
+
+ _stream.Seek(target, SeekOrigin.Begin);
+ _pendingBranches.Remove(block);
+ }
+
+ _visitedBlocks.Add(block, target);
+ }
+
+ public void JumpTo(BasicBlock target)
+ {
+ JumpTo(ArmCondition.Al, target);
+ }
+
+ public void JumpTo(ArmCondition condition, BasicBlock target)
+ {
+ if (_visitedBlocks.TryGetValue(target, out long offset))
+ {
+ WriteBranch(condition, offset);
+ }
+ else
+ {
+ if (!_pendingBranches.TryGetValue(target, out var list))
+ {
+ list = new List<(ArmCondition, long)>();
+ _pendingBranches.Add(target, list);
+ }
+
+ list.Add((condition, _stream.Position));
+
+ _stream.Seek(BccInstLength, SeekOrigin.Current);
+ }
+ }
+
+ private void WriteBranch(ArmCondition condition, long to)
+ {
+ int imm = checked((int)(to - _stream.Position));
+
+ if (condition != ArmCondition.Al)
+ {
+ Assembler.B(condition, imm);
+ }
+ else
+ {
+ Assembler.B(imm);
+ }
+ }
+
+ public void JumpToNear(ArmCondition condition)
+ {
+ _jNearCondition = condition;
+ _jNearPosition = _stream.Position;
+
+ _stream.Seek(BccInstLength, SeekOrigin.Current);
+ }
+
+ public void JumpToNearIfNotZero(Operand value)
+ {
+ _jNearValue = value;
+ _jNearPosition = _stream.Position;
+
+ _stream.Seek(CbnzInstLength, SeekOrigin.Current);
+ }
+
+ public void JumpHere()
+ {
+ long currentPosition = _stream.Position;
+ long offset = currentPosition - _jNearPosition;
+
+ _stream.Seek(_jNearPosition, SeekOrigin.Begin);
+
+ if (_jNearValue != default)
+ {
+ Assembler.Cbnz(_jNearValue, checked((int)offset));
+ _jNearValue = default;
+ }
+ else
+ {
+ Assembler.B(_jNearCondition, checked((int)offset));
+ }
+
+ _stream.Seek(currentPosition, SeekOrigin.Begin);
+ }
+
+ public void ReserveRelocatableConstant(Operand rt, Symbol symbol, ulong value)
+ {
+ if (!_constantPool.TryGetValue(value, out ConstantPoolEntry cpe))
+ {
+ cpe = new ConstantPoolEntry(_constantPool.Count * sizeof(ulong), symbol);
+ _constantPool.Add(value, cpe);
+ }
+
+ cpe.LdrOffsets.Add((rt, (int)_stream.Position));
+ _stream.Seek(LdrLitInstLength, SeekOrigin.Current);
+ }
+
+ private long WriteConstantPool()
+ {
+ if (_constantPoolWritten)
+ {
+ return _constantPoolOffset;
+ }
+
+ long constantPoolBaseOffset = _stream.Position;
+
+ foreach (ulong value in _constantPool.Keys)
+ {
+ WriteUInt64(value);
+ }
+
+ foreach (ConstantPoolEntry cpe in _constantPool.Values)
+ {
+ foreach ((Operand rt, int ldrOffset) in cpe.LdrOffsets)
+ {
+ _stream.Seek(ldrOffset, SeekOrigin.Begin);
+
+ int absoluteOffset = checked((int)(constantPoolBaseOffset + cpe.Offset));
+ int pcRelativeOffset = absoluteOffset - ldrOffset;
+
+ Assembler.LdrLit(rt, pcRelativeOffset);
+ }
+ }
+
+ _stream.Seek(constantPoolBaseOffset + _constantPool.Count * sizeof(ulong), SeekOrigin.Begin);
+
+ _constantPoolOffset = constantPoolBaseOffset;
+ _constantPoolWritten = true;
+
+ return constantPoolBaseOffset;
+ }
+
+ public (byte[], RelocInfo) GetCode()
+ {
+ long constantPoolBaseOffset = WriteConstantPool();
+
+ byte[] code = new byte[_stream.Length];
+
+ long originalPosition = _stream.Position;
+
+ _stream.Seek(0, SeekOrigin.Begin);
+ _stream.Read(code, 0, code.Length);
+ _stream.Seek(originalPosition, SeekOrigin.Begin);
+
+ RelocInfo relocInfo;
+
+ if (_relocatable)
+ {
+ RelocEntry[] relocs = new RelocEntry[_constantPool.Count];
+
+ int index = 0;
+
+ foreach (ConstantPoolEntry cpe in _constantPool.Values)
+ {
+ if (cpe.Symbol.Type != SymbolType.None)
+ {
+ int absoluteOffset = checked((int)(constantPoolBaseOffset + cpe.Offset));
+ relocs[index++] = new RelocEntry(absoluteOffset, cpe.Symbol);
+ }
+ }
+
+ if (index != relocs.Length)
+ {
+ Array.Resize(ref relocs, index);
+ }
+
+ relocInfo = new RelocInfo(relocs);
+ }
+ else
+ {
+ relocInfo = new RelocInfo(Array.Empty<RelocEntry>());
+ }
+
+ return (code, relocInfo);
+ }
+
+ private void WriteUInt64(ulong value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ _stream.WriteByte((byte)(value >> 16));
+ _stream.WriteByte((byte)(value >> 24));
+ _stream.WriteByte((byte)(value >> 32));
+ _stream.WriteByte((byte)(value >> 40));
+ _stream.WriteByte((byte)(value >> 48));
+ _stream.WriteByte((byte)(value >> 56));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs b/src/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs
new file mode 100644
index 00000000..fc4fa976
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs
@@ -0,0 +1,1580 @@
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.CodeGen.Optimizations;
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.CodeGen.Unwinding;
+using ARMeilleure.Common;
+using ARMeilleure.Diagnostics;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Numerics;
+
+using static ARMeilleure.IntermediateRepresentation.Operand;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static class CodeGenerator
+ {
+ private const int DWordScale = 3;
+
+ private const int RegistersCount = 32;
+
+ private const int FpRegister = 29;
+ private const int LrRegister = 30;
+ private const int SpRegister = 31;
+ private const int ZrRegister = 31;
+
+ private enum AccessSize
+ {
+ Byte,
+ Hword,
+ Auto
+ }
+
+ private static Action<CodeGenContext, Operation>[] _instTable;
+
+ static CodeGenerator()
+ {
+ _instTable = new Action<CodeGenContext, Operation>[EnumUtils.GetCount(typeof(Instruction))];
+
+ Add(Instruction.Add, GenerateAdd);
+ Add(Instruction.BitwiseAnd, GenerateBitwiseAnd);
+ Add(Instruction.BitwiseExclusiveOr, GenerateBitwiseExclusiveOr);
+ Add(Instruction.BitwiseNot, GenerateBitwiseNot);
+ Add(Instruction.BitwiseOr, GenerateBitwiseOr);
+ Add(Instruction.BranchIf, GenerateBranchIf);
+ Add(Instruction.ByteSwap, GenerateByteSwap);
+ Add(Instruction.Call, GenerateCall);
+ //Add(Instruction.Clobber, GenerateClobber);
+ Add(Instruction.Compare, GenerateCompare);
+ Add(Instruction.CompareAndSwap, GenerateCompareAndSwap);
+ Add(Instruction.CompareAndSwap16, GenerateCompareAndSwap16);
+ Add(Instruction.CompareAndSwap8, GenerateCompareAndSwap8);
+ Add(Instruction.ConditionalSelect, GenerateConditionalSelect);
+ Add(Instruction.ConvertI64ToI32, GenerateConvertI64ToI32);
+ Add(Instruction.ConvertToFP, GenerateConvertToFP);
+ Add(Instruction.ConvertToFPUI, GenerateConvertToFPUI);
+ Add(Instruction.Copy, GenerateCopy);
+ Add(Instruction.CountLeadingZeros, GenerateCountLeadingZeros);
+ Add(Instruction.Divide, GenerateDivide);
+ Add(Instruction.DivideUI, GenerateDivideUI);
+ Add(Instruction.Fill, GenerateFill);
+ Add(Instruction.Load, GenerateLoad);
+ Add(Instruction.Load16, GenerateLoad16);
+ Add(Instruction.Load8, GenerateLoad8);
+ Add(Instruction.MemoryBarrier, GenerateMemoryBarrier);
+ Add(Instruction.Multiply, GenerateMultiply);
+ Add(Instruction.Multiply64HighSI, GenerateMultiply64HighSI);
+ Add(Instruction.Multiply64HighUI, GenerateMultiply64HighUI);
+ Add(Instruction.Negate, GenerateNegate);
+ Add(Instruction.Return, GenerateReturn);
+ Add(Instruction.RotateRight, GenerateRotateRight);
+ Add(Instruction.ShiftLeft, GenerateShiftLeft);
+ Add(Instruction.ShiftRightSI, GenerateShiftRightSI);
+ Add(Instruction.ShiftRightUI, GenerateShiftRightUI);
+ Add(Instruction.SignExtend16, GenerateSignExtend16);
+ Add(Instruction.SignExtend32, GenerateSignExtend32);
+ Add(Instruction.SignExtend8, GenerateSignExtend8);
+ Add(Instruction.Spill, GenerateSpill);
+ Add(Instruction.SpillArg, GenerateSpillArg);
+ Add(Instruction.StackAlloc, GenerateStackAlloc);
+ Add(Instruction.Store, GenerateStore);
+ Add(Instruction.Store16, GenerateStore16);
+ Add(Instruction.Store8, GenerateStore8);
+ Add(Instruction.Subtract, GenerateSubtract);
+ Add(Instruction.Tailcall, GenerateTailcall);
+ Add(Instruction.VectorCreateScalar, GenerateVectorCreateScalar);
+ Add(Instruction.VectorExtract, GenerateVectorExtract);
+ Add(Instruction.VectorExtract16, GenerateVectorExtract16);
+ Add(Instruction.VectorExtract8, GenerateVectorExtract8);
+ Add(Instruction.VectorInsert, GenerateVectorInsert);
+ Add(Instruction.VectorInsert16, GenerateVectorInsert16);
+ Add(Instruction.VectorInsert8, GenerateVectorInsert8);
+ Add(Instruction.VectorOne, GenerateVectorOne);
+ Add(Instruction.VectorZero, GenerateVectorZero);
+ Add(Instruction.VectorZeroUpper64, GenerateVectorZeroUpper64);
+ Add(Instruction.VectorZeroUpper96, GenerateVectorZeroUpper96);
+ Add(Instruction.ZeroExtend16, GenerateZeroExtend16);
+ Add(Instruction.ZeroExtend32, GenerateZeroExtend32);
+ Add(Instruction.ZeroExtend8, GenerateZeroExtend8);
+
+ static void Add(Instruction inst, Action<CodeGenContext, Operation> func)
+ {
+ _instTable[(int)inst] = func;
+ }
+ }
+
+ public static CompiledFunction Generate(CompilerContext cctx)
+ {
+ ControlFlowGraph cfg = cctx.Cfg;
+
+ Logger.StartPass(PassName.Optimization);
+
+ if (cctx.Options.HasFlag(CompilerOptions.Optimize))
+ {
+ if (cctx.Options.HasFlag(CompilerOptions.SsaForm))
+ {
+ Optimizer.RunPass(cfg);
+ }
+
+ BlockPlacement.RunPass(cfg);
+ }
+
+ Arm64Optimizer.RunPass(cfg);
+
+ Logger.EndPass(PassName.Optimization, cfg);
+
+ Logger.StartPass(PassName.PreAllocation);
+
+ StackAllocator stackAlloc = new();
+
+ PreAllocator.RunPass(cctx, stackAlloc, out int maxCallArgs);
+
+ Logger.EndPass(PassName.PreAllocation, cfg);
+
+ Logger.StartPass(PassName.RegisterAllocation);
+
+ if (cctx.Options.HasFlag(CompilerOptions.SsaForm))
+ {
+ Ssa.Deconstruct(cfg);
+ }
+
+ IRegisterAllocator regAlloc;
+
+ if (cctx.Options.HasFlag(CompilerOptions.Lsra))
+ {
+ regAlloc = new LinearScanAllocator();
+ }
+ else
+ {
+ regAlloc = new HybridAllocator();
+ }
+
+ RegisterMasks regMasks = new(
+ CallingConvention.GetIntAvailableRegisters(),
+ CallingConvention.GetVecAvailableRegisters(),
+ CallingConvention.GetIntCallerSavedRegisters(),
+ CallingConvention.GetVecCallerSavedRegisters(),
+ CallingConvention.GetIntCalleeSavedRegisters(),
+ CallingConvention.GetVecCalleeSavedRegisters(),
+ RegistersCount);
+
+ AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks);
+
+ Logger.EndPass(PassName.RegisterAllocation, cfg);
+
+ Logger.StartPass(PassName.CodeGeneration);
+
+ //Console.Error.WriteLine(IRDumper.GetDump(cfg));
+
+ bool relocatable = (cctx.Options & CompilerOptions.Relocatable) != 0;
+
+ CodeGenContext context = new(allocResult, maxCallArgs, cfg.Blocks.Count, relocatable);
+
+ UnwindInfo unwindInfo = WritePrologue(context);
+
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ context.EnterBlock(block);
+
+ for (Operation node = block.Operations.First; node != default;)
+ {
+ node = GenerateOperation(context, node);
+ }
+
+ if (block.SuccessorsCount == 0)
+ {
+ // The only blocks which can have 0 successors are exit blocks.
+ Operation last = block.Operations.Last;
+
+ Debug.Assert(last.Instruction == Instruction.Tailcall ||
+ last.Instruction == Instruction.Return);
+ }
+ else
+ {
+ BasicBlock succ = block.GetSuccessor(0);
+
+ if (succ != block.ListNext)
+ {
+ context.JumpTo(succ);
+ }
+ }
+ }
+
+ (byte[] code, RelocInfo relocInfo) = context.GetCode();
+
+ Logger.EndPass(PassName.CodeGeneration);
+
+ return new CompiledFunction(code, unwindInfo, relocInfo);
+ }
+
+ private static Operation GenerateOperation(CodeGenContext context, Operation operation)
+ {
+ if (operation.Instruction == Instruction.Extended)
+ {
+ CodeGeneratorIntrinsic.GenerateOperation(context, operation);
+ }
+ else
+ {
+ if (IsLoadOrStore(operation) &&
+ operation.ListNext != default &&
+ operation.ListNext.Instruction == operation.Instruction &&
+ TryPairMemoryOp(context, operation, operation.ListNext))
+ {
+ // Skip next operation if we managed to pair them.
+ return operation.ListNext.ListNext;
+ }
+
+ Action<CodeGenContext, Operation> func = _instTable[(int)operation.Instruction];
+
+ if (func != null)
+ {
+ func(context, operation);
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid instruction \"{operation.Instruction}\".");
+ }
+ }
+
+ return operation.ListNext;
+ }
+
+ private static void GenerateAdd(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ // ValidateBinOp(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Add(dest, src1, src2);
+ }
+ else
+ {
+ context.Assembler.FaddScalar(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateBitwiseAnd(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.And(dest, src1, src2);
+ }
+
+ private static void GenerateBitwiseExclusiveOr(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Eor(dest, src1, src2);
+ }
+ else
+ {
+ context.Assembler.EorVector(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateBitwiseNot(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ ValidateUnOp(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Mvn(dest, source);
+ }
+
+ private static void GenerateBitwiseOr(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Orr(dest, src1, src2);
+ }
+
+ private static void GenerateBranchIf(CodeGenContext context, Operation operation)
+ {
+ Operand comp = operation.GetSource(2);
+
+ Debug.Assert(comp.Kind == OperandKind.Constant);
+
+ var cond = ((Comparison)comp.AsInt32()).ToArmCondition();
+
+ GenerateCompareCommon(context, operation);
+
+ context.JumpTo(cond, context.CurrBlock.GetSuccessor(1));
+ }
+
+ private static void GenerateByteSwap(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ ValidateUnOp(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Rev(dest, source);
+ }
+
+ private static void GenerateCall(CodeGenContext context, Operation operation)
+ {
+ context.Assembler.Blr(operation.GetSource(0));
+ }
+
+ private static void GenerateCompare(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand comp = operation.GetSource(2);
+
+ Debug.Assert(dest.Type == OperandType.I32);
+ Debug.Assert(comp.Kind == OperandKind.Constant);
+
+ var cond = ((Comparison)comp.AsInt32()).ToArmCondition();
+
+ GenerateCompareCommon(context, operation);
+
+ context.Assembler.Cset(dest, cond);
+ }
+
+ private static void GenerateCompareAndSwap(CodeGenContext context, Operation operation)
+ {
+ if (operation.SourcesCount == 5) // CompareAndSwap128 has 5 sources, compared to CompareAndSwap64/32's 3.
+ {
+ Operand actualLow = operation.GetDestination(0);
+ Operand actualHigh = operation.GetDestination(1);
+ Operand temp0 = operation.GetDestination(2);
+ Operand temp1 = operation.GetDestination(3);
+ Operand address = operation.GetSource(0);
+ Operand expectedLow = operation.GetSource(1);
+ Operand expectedHigh = operation.GetSource(2);
+ Operand desiredLow = operation.GetSource(3);
+ Operand desiredHigh = operation.GetSource(4);
+
+ GenerateAtomicDcas(
+ context,
+ address,
+ expectedLow,
+ expectedHigh,
+ desiredLow,
+ desiredHigh,
+ actualLow,
+ actualHigh,
+ temp0,
+ temp1);
+ }
+ else
+ {
+ Operand actual = operation.GetDestination(0);
+ Operand result = operation.GetDestination(1);
+ Operand address = operation.GetSource(0);
+ Operand expected = operation.GetSource(1);
+ Operand desired = operation.GetSource(2);
+
+ GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Auto);
+ }
+ }
+
+ private static void GenerateCompareAndSwap16(CodeGenContext context, Operation operation)
+ {
+ Operand actual = operation.GetDestination(0);
+ Operand result = operation.GetDestination(1);
+ Operand address = operation.GetSource(0);
+ Operand expected = operation.GetSource(1);
+ Operand desired = operation.GetSource(2);
+
+ GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Hword);
+ }
+
+ private static void GenerateCompareAndSwap8(CodeGenContext context, Operation operation)
+ {
+ Operand actual = operation.GetDestination(0);
+ Operand result = operation.GetDestination(1);
+ Operand address = operation.GetSource(0);
+ Operand expected = operation.GetSource(1);
+ Operand desired = operation.GetSource(2);
+
+ GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Byte);
+ }
+
+ private static void GenerateCompareCommon(CodeGenContext context, Operation operation)
+ {
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(src1, src2);
+
+ Debug.Assert(src1.Type.IsInteger());
+
+ context.Assembler.Cmp(src1, src2);
+ }
+
+ private static void GenerateConditionalSelect(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameType(dest, src2, src3);
+
+ Debug.Assert(dest.Type.IsInteger());
+ Debug.Assert(src1.Type == OperandType.I32);
+
+ context.Assembler.Cmp (src1, Const(src1.Type, 0));
+ context.Assembler.Csel(dest, src2, src3, ArmCondition.Ne);
+ }
+
+ private static void GenerateConvertI64ToI32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.I32 && source.Type == OperandType.I64);
+
+ context.Assembler.Mov(dest, Register(source, OperandType.I32));
+ }
+
+ private static void GenerateConvertToFP(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64);
+ Debug.Assert(dest.Type != source.Type);
+ Debug.Assert(source.Type != OperandType.V128);
+
+ if (source.Type.IsInteger())
+ {
+ context.Assembler.ScvtfScalar(dest, source);
+ }
+ else
+ {
+ context.Assembler.FcvtScalar(dest, source);
+ }
+ }
+
+ private static void GenerateConvertToFPUI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64);
+ Debug.Assert(dest.Type != source.Type);
+ Debug.Assert(source.Type.IsInteger());
+
+ context.Assembler.UcvtfScalar(dest, source);
+ }
+
+ private static void GenerateCopy(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger() || source.Kind != OperandKind.Constant);
+
+ // Moves to the same register are useless.
+ if (dest.Kind == source.Kind && dest.Value == source.Value)
+ {
+ return;
+ }
+
+ if (dest.Kind == OperandKind.Register && source.Kind == OperandKind.Constant)
+ {
+ if (source.Relocatable)
+ {
+ context.ReserveRelocatableConstant(dest, source.Symbol, source.Value);
+ }
+ else
+ {
+ GenerateConstantCopy(context, dest, source.Value);
+ }
+ }
+ else
+ {
+ context.Assembler.Mov(dest, source);
+ }
+ }
+
+ private static void GenerateCountLeadingZeros(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Clz(dest, source);
+ }
+
+ private static void GenerateDivide(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand dividend = operation.GetSource(0);
+ Operand divisor = operation.GetSource(1);
+
+ ValidateBinOp(dest, dividend, divisor);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Sdiv(dest, dividend, divisor);
+ }
+ else
+ {
+ context.Assembler.FdivScalar(dest, dividend, divisor);
+ }
+ }
+
+ private static void GenerateDivideUI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand dividend = operation.GetSource(0);
+ Operand divisor = operation.GetSource(1);
+
+ ValidateBinOp(dest, dividend, divisor);
+
+ context.Assembler.Udiv(dest, dividend, divisor);
+ }
+
+ private static void GenerateLoad(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = operation.GetSource(0);
+
+ context.Assembler.Ldr(value, address);
+ }
+
+ private static void GenerateLoad16(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = operation.GetSource(0);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.LdrhRiUn(value, address, 0);
+ }
+
+ private static void GenerateLoad8(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = operation.GetSource(0);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.LdrbRiUn(value, address, 0);
+ }
+
+ private static void GenerateMemoryBarrier(CodeGenContext context, Operation operation)
+ {
+ context.Assembler.Dmb(0xf);
+ }
+
+ private static void GenerateMultiply(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Mul(dest, src1, src2);
+ }
+ else
+ {
+ context.Assembler.FmulScalar(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateMultiply64HighSI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(dest, src1, src2);
+
+ Debug.Assert(dest.Type == OperandType.I64);
+
+ context.Assembler.Smulh(dest, src1, src2);
+ }
+
+ private static void GenerateMultiply64HighUI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(dest, src1, src2);
+
+ Debug.Assert(dest.Type == OperandType.I64);
+
+ context.Assembler.Umulh(dest, src1, src2);
+ }
+
+ private static void GenerateNegate(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ ValidateUnOp(dest, source);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Neg(dest, source);
+ }
+ else
+ {
+ context.Assembler.FnegScalar(dest, source);
+ }
+ }
+
+ private static void GenerateLoad(CodeGenContext context, Operand value, Operand address, int offset)
+ {
+ if (CodeGenCommon.ConstFitsOnUImm12(offset, value.Type))
+ {
+ context.Assembler.LdrRiUn(value, address, offset);
+ }
+ else if (CodeGenCommon.ConstFitsOnSImm9(offset))
+ {
+ context.Assembler.Ldur(value, address, offset);
+ }
+ else
+ {
+ Operand tempAddress = Register(CodeGenCommon.ReservedRegister);
+ GenerateConstantCopy(context, tempAddress, (ulong)offset);
+ context.Assembler.Add(tempAddress, address, tempAddress, ArmExtensionType.Uxtx); // Address might be SP and must be the first input.
+ context.Assembler.LdrRiUn(value, tempAddress, 0);
+ }
+ }
+
+ private static void GenerateReturn(CodeGenContext context, Operation operation)
+ {
+ WriteEpilogue(context);
+
+ context.Assembler.Ret(Register(LrRegister));
+ }
+
+ private static void GenerateRotateRight(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Ror(dest, src1, src2);
+ }
+
+ private static void GenerateShiftLeft(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Lsl(dest, src1, src2);
+ }
+
+ private static void GenerateShiftRightSI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Asr(dest, src1, src2);
+ }
+
+ private static void GenerateShiftRightUI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Lsr(dest, src1, src2);
+ }
+
+ private static void GenerateSignExtend16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Sxth(dest, source);
+ }
+
+ private static void GenerateSignExtend32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Sxtw(dest, source);
+ }
+
+ private static void GenerateSignExtend8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Sxtb(dest, source);
+ }
+
+ private static void GenerateFill(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand offset = operation.GetSource(0);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+
+ int offs = offset.AsInt32() + context.CallArgsRegionSize + context.FpLrSaveRegionSize;
+
+ GenerateLoad(context, dest, Register(SpRegister), offs);
+ }
+
+ private static void GenerateStore(CodeGenContext context, Operand value, Operand address, int offset)
+ {
+ if (CodeGenCommon.ConstFitsOnUImm12(offset, value.Type))
+ {
+ context.Assembler.StrRiUn(value, address, offset);
+ }
+ else if (CodeGenCommon.ConstFitsOnSImm9(offset))
+ {
+ context.Assembler.Stur(value, address, offset);
+ }
+ else
+ {
+ Operand tempAddress = Register(CodeGenCommon.ReservedRegister);
+ GenerateConstantCopy(context, tempAddress, (ulong)offset);
+ context.Assembler.Add(tempAddress, address, tempAddress, ArmExtensionType.Uxtx); // Address might be SP and must be the first input.
+ context.Assembler.StrRiUn(value, tempAddress, 0);
+ }
+ }
+
+ private static void GenerateSpill(CodeGenContext context, Operation operation)
+ {
+ GenerateSpill(context, operation, context.CallArgsRegionSize + context.FpLrSaveRegionSize);
+ }
+
+ private static void GenerateSpillArg(CodeGenContext context, Operation operation)
+ {
+ GenerateSpill(context, operation, 0);
+ }
+
+ private static void GenerateStackAlloc(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand offset = operation.GetSource(0);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+
+ int offs = offset.AsInt32() + context.CallArgsRegionSize + context.FpLrSaveRegionSize;
+
+ context.Assembler.Add(dest, Register(SpRegister), Const(dest.Type, offs));
+ }
+
+ private static void GenerateStore(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = operation.GetSource(0);
+
+ context.Assembler.Str(value, address);
+ }
+
+ private static void GenerateStore16(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = operation.GetSource(0);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.StrhRiUn(value, address, 0);
+ }
+
+ private static void GenerateStore8(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = operation.GetSource(0);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.StrbRiUn(value, address, 0);
+ }
+
+ private static void GenerateSpill(CodeGenContext context, Operation operation, int baseOffset)
+ {
+ Operand offset = operation.GetSource(0);
+ Operand source = operation.GetSource(1);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+
+ int offs = offset.AsInt32() + baseOffset;
+
+ GenerateStore(context, source, Register(SpRegister), offs);
+ }
+
+ private static void GenerateSubtract(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ // ValidateBinOp(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Sub(dest, src1, src2);
+ }
+ else
+ {
+ context.Assembler.FsubScalar(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateTailcall(CodeGenContext context, Operation operation)
+ {
+ WriteEpilogue(context);
+
+ context.Assembler.Br(operation.GetSource(0));
+ }
+
+ private static void GenerateVectorCreateScalar(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ if (dest != default)
+ {
+ Debug.Assert(!dest.Type.IsInteger() && source.Type.IsInteger());
+
+ OperandType destType = source.Type == OperandType.I64 ? OperandType.FP64 : OperandType.FP32;
+
+ context.Assembler.Fmov(Register(dest, destType), source, topHalf: false);
+ }
+ }
+
+ private static void GenerateVectorExtract(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; // Value
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Index
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+
+ byte index = src2.AsByte();
+
+ Debug.Assert(index < OperandType.V128.GetSizeInBytes() / dest.Type.GetSizeInBytes());
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Umov(dest, src1, index, dest.Type == OperandType.I64 ? 3 : 2);
+ }
+ else
+ {
+ context.Assembler.DupScalar(dest, src1, index, dest.Type == OperandType.FP64 ? 3 : 2);
+ }
+ }
+
+ private static void GenerateVectorExtract16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; // Value
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Index
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+
+ byte index = src2.AsByte();
+
+ Debug.Assert(index < 8);
+
+ context.Assembler.Umov(dest, src1, index, 1);
+ }
+
+ private static void GenerateVectorExtract8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; // Value
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Index
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+
+ byte index = src2.AsByte();
+
+ Debug.Assert(index < 16);
+
+ context.Assembler.Umov(dest, src1, index, 0);
+ }
+
+ private static void GenerateVectorInsert(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Value
+ Operand src3 = operation.GetSource(2); // Index
+
+ EnsureSameReg(dest, src1);
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ if (src2.Type.IsInteger())
+ {
+ context.Assembler.Ins(dest, src2, index, src2.Type == OperandType.I64 ? 3 : 2);
+ }
+ else
+ {
+ context.Assembler.Ins(dest, src2, 0, index, src2.Type == OperandType.FP64 ? 3 : 2);
+ }
+ }
+
+ private static void GenerateVectorInsert16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Value
+ Operand src3 = operation.GetSource(2); // Index
+
+ EnsureSameReg(dest, src1);
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ context.Assembler.Ins(dest, src2, index, 1);
+ }
+
+ private static void GenerateVectorInsert8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); // Vector
+ Operand src2 = operation.GetSource(1); // Value
+ Operand src3 = operation.GetSource(2); // Index
+
+ EnsureSameReg(dest, src1);
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ context.Assembler.Ins(dest, src2, index, 0);
+ }
+
+ private static void GenerateVectorOne(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ context.Assembler.CmeqVector(dest, dest, dest, 2);
+ }
+
+ private static void GenerateVectorZero(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ context.Assembler.EorVector(dest, dest, dest);
+ }
+
+ private static void GenerateVectorZeroUpper64(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128);
+
+ context.Assembler.Fmov(Register(dest, OperandType.FP64), Register(source, OperandType.FP64));
+ }
+
+ private static void GenerateVectorZeroUpper96(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128);
+
+ context.Assembler.Fmov(Register(dest, OperandType.FP32), Register(source, OperandType.FP32));
+ }
+
+ private static void GenerateZeroExtend16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Uxth(dest, source);
+ }
+
+ private static void GenerateZeroExtend32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ // We can eliminate the move if source is already 32-bit and the registers are the same.
+ if (dest.Value == source.Value && source.Type == OperandType.I32)
+ {
+ return;
+ }
+
+ context.Assembler.Mov(Register(dest.GetRegister().Index, OperandType.I32), source);
+ }
+
+ private static void GenerateZeroExtend8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Uxtb(dest, source);
+ }
+
+ private static UnwindInfo WritePrologue(CodeGenContext context)
+ {
+ List<UnwindPushEntry> pushEntries = new List<UnwindPushEntry>();
+
+ Operand rsp = Register(SpRegister);
+
+ int intMask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
+ int vecMask = CallingConvention.GetFpCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters;
+
+ int intCalleeSavedRegsCount = BitOperations.PopCount((uint)intMask);
+ int vecCalleeSavedRegsCount = BitOperations.PopCount((uint)vecMask);
+
+ int calleeSaveRegionSize = Align16(intCalleeSavedRegsCount * 8 + vecCalleeSavedRegsCount * 8);
+
+ int offset = 0;
+
+ WritePrologueCalleeSavesPreIndexed(context, pushEntries, ref intMask, ref offset, calleeSaveRegionSize, OperandType.I64);
+ WritePrologueCalleeSavesPreIndexed(context, pushEntries, ref vecMask, ref offset, calleeSaveRegionSize, OperandType.FP64);
+
+ int localSize = Align16(context.AllocResult.SpillRegionSize + context.FpLrSaveRegionSize);
+ int outArgsSize = context.CallArgsRegionSize;
+
+ if (CodeGenCommon.ConstFitsOnSImm7(localSize, DWordScale))
+ {
+ if (context.HasCall)
+ {
+ context.Assembler.StpRiPre(Register(FpRegister), Register(LrRegister), rsp, -localSize);
+ context.Assembler.MovSp(Register(FpRegister), rsp);
+ }
+
+ if (outArgsSize != 0)
+ {
+ context.Assembler.Sub(rsp, rsp, Const(OperandType.I64, outArgsSize));
+ }
+ }
+ else
+ {
+ int frameSize = localSize + outArgsSize;
+ if (frameSize != 0)
+ {
+ if (CodeGenCommon.ConstFitsOnUImm12(frameSize))
+ {
+ context.Assembler.Sub(rsp, rsp, Const(OperandType.I64, frameSize));
+ }
+ else
+ {
+ Operand tempSize = Register(CodeGenCommon.ReservedRegister);
+ GenerateConstantCopy(context, tempSize, (ulong)frameSize);
+ context.Assembler.Sub(rsp, rsp, tempSize, ArmExtensionType.Uxtx);
+ }
+ }
+
+ context.Assembler.StpRiUn(Register(FpRegister), Register(LrRegister), rsp, outArgsSize);
+
+ if (outArgsSize != 0)
+ {
+ context.Assembler.Add(Register(FpRegister), Register(SpRegister), Const(OperandType.I64, outArgsSize));
+ }
+ else
+ {
+ context.Assembler.MovSp(Register(FpRegister), Register(SpRegister));
+ }
+ }
+
+ return new UnwindInfo(pushEntries.ToArray(), context.StreamOffset);
+ }
+
+ private static void WritePrologueCalleeSavesPreIndexed(
+ CodeGenContext context,
+ List<UnwindPushEntry> pushEntries,
+ ref int mask,
+ ref int offset,
+ int calleeSaveRegionSize,
+ OperandType type)
+ {
+ if ((BitOperations.PopCount((uint)mask) & 1) != 0)
+ {
+ int reg = BitOperations.TrailingZeroCount(mask);
+
+ pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg));
+
+ mask &= ~(1 << reg);
+
+ if (offset != 0)
+ {
+ context.Assembler.StrRiUn(Register(reg, type), Register(SpRegister), offset);
+ }
+ else
+ {
+ context.Assembler.StrRiPre(Register(reg, type), Register(SpRegister), -calleeSaveRegionSize);
+ }
+
+ offset += type.GetSizeInBytes();
+ }
+
+ while (mask != 0)
+ {
+ int reg = BitOperations.TrailingZeroCount(mask);
+
+ pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg));
+
+ mask &= ~(1 << reg);
+
+ int reg2 = BitOperations.TrailingZeroCount(mask);
+
+ pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg2));
+
+ mask &= ~(1 << reg2);
+
+ if (offset != 0)
+ {
+ context.Assembler.StpRiUn(Register(reg, type), Register(reg2, type), Register(SpRegister), offset);
+ }
+ else
+ {
+ context.Assembler.StpRiPre(Register(reg, type), Register(reg2, type), Register(SpRegister), -calleeSaveRegionSize);
+ }
+
+ offset += type.GetSizeInBytes() * 2;
+ }
+ }
+
+ private static void WriteEpilogue(CodeGenContext context)
+ {
+ Operand rsp = Register(SpRegister);
+
+ int localSize = Align16(context.AllocResult.SpillRegionSize + context.FpLrSaveRegionSize);
+ int outArgsSize = context.CallArgsRegionSize;
+
+ if (CodeGenCommon.ConstFitsOnSImm7(localSize, DWordScale))
+ {
+ if (outArgsSize != 0)
+ {
+ context.Assembler.Add(rsp, rsp, Const(OperandType.I64, outArgsSize));
+ }
+
+ if (context.HasCall)
+ {
+ context.Assembler.LdpRiPost(Register(FpRegister), Register(LrRegister), rsp, localSize);
+ }
+ }
+ else
+ {
+ if (context.HasCall)
+ {
+ context.Assembler.LdpRiUn(Register(FpRegister), Register(LrRegister), rsp, outArgsSize);
+ }
+
+ int frameSize = localSize + outArgsSize;
+ if (frameSize != 0)
+ {
+ if (CodeGenCommon.ConstFitsOnUImm12(frameSize))
+ {
+ context.Assembler.Add(rsp, rsp, Const(OperandType.I64, frameSize));
+ }
+ else
+ {
+ Operand tempSize = Register(CodeGenCommon.ReservedRegister);
+ GenerateConstantCopy(context, tempSize, (ulong)frameSize);
+ context.Assembler.Add(rsp, rsp, tempSize, ArmExtensionType.Uxtx);
+ }
+ }
+ }
+
+ int intMask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
+ int vecMask = CallingConvention.GetFpCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters;
+
+ int intCalleeSavedRegsCount = BitOperations.PopCount((uint)intMask);
+ int vecCalleeSavedRegsCount = BitOperations.PopCount((uint)vecMask);
+
+ int offset = intCalleeSavedRegsCount * 8 + vecCalleeSavedRegsCount * 8;
+ int calleeSaveRegionSize = Align16(offset);
+
+ WriteEpilogueCalleeSavesPostIndexed(context, ref vecMask, ref offset, calleeSaveRegionSize, OperandType.FP64);
+ WriteEpilogueCalleeSavesPostIndexed(context, ref intMask, ref offset, calleeSaveRegionSize, OperandType.I64);
+ }
+
+ private static void WriteEpilogueCalleeSavesPostIndexed(
+ CodeGenContext context,
+ ref int mask,
+ ref int offset,
+ int calleeSaveRegionSize,
+ OperandType type)
+ {
+ while (mask != 0)
+ {
+ int reg = BitUtils.HighestBitSet(mask);
+
+ mask &= ~(1 << reg);
+
+ if (mask != 0)
+ {
+ int reg2 = BitUtils.HighestBitSet(mask);
+
+ mask &= ~(1 << reg2);
+
+ offset -= type.GetSizeInBytes() * 2;
+
+ if (offset != 0)
+ {
+ context.Assembler.LdpRiUn(Register(reg2, type), Register(reg, type), Register(SpRegister), offset);
+ }
+ else
+ {
+ context.Assembler.LdpRiPost(Register(reg2, type), Register(reg, type), Register(SpRegister), calleeSaveRegionSize);
+ }
+ }
+ else
+ {
+ offset -= type.GetSizeInBytes();
+
+ if (offset != 0)
+ {
+ context.Assembler.LdrRiUn(Register(reg, type), Register(SpRegister), offset);
+ }
+ else
+ {
+ context.Assembler.LdrRiPost(Register(reg, type), Register(SpRegister), calleeSaveRegionSize);
+ }
+ }
+ }
+ }
+
+ private static void GenerateConstantCopy(CodeGenContext context, Operand dest, ulong value)
+ {
+ if (value == 0)
+ {
+ context.Assembler.Mov(dest, Register(ZrRegister, dest.Type));
+ }
+ else if (CodeGenCommon.TryEncodeBitMask(dest.Type, value, out _, out _, out _))
+ {
+ context.Assembler.Orr(dest, Register(ZrRegister, dest.Type), Const(dest.Type, (long)value));
+ }
+ else
+ {
+ int hw = 0;
+ bool first = true;
+
+ while (value != 0)
+ {
+ int valueLow = (ushort)value;
+ if (valueLow != 0)
+ {
+ if (first)
+ {
+ context.Assembler.Movz(dest, valueLow, hw);
+ first = false;
+ }
+ else
+ {
+ context.Assembler.Movk(dest, valueLow, hw);
+ }
+ }
+
+ hw++;
+ value >>= 16;
+ }
+ }
+ }
+
+ private static void GenerateAtomicCas(
+ CodeGenContext context,
+ Operand address,
+ Operand expected,
+ Operand desired,
+ Operand actual,
+ Operand result,
+ AccessSize accessSize)
+ {
+ int startOffset = context.StreamOffset;
+
+ switch (accessSize)
+ {
+ case AccessSize.Byte:
+ context.Assembler.Ldaxrb(actual, address);
+ break;
+ case AccessSize.Hword:
+ context.Assembler.Ldaxrh(actual, address);
+ break;
+ default:
+ context.Assembler.Ldaxr(actual, address);
+ break;
+ }
+
+ context.Assembler.Cmp(actual, expected);
+
+ context.JumpToNear(ArmCondition.Ne);
+
+ switch (accessSize)
+ {
+ case AccessSize.Byte:
+ context.Assembler.Stlxrb(desired, address, result);
+ break;
+ case AccessSize.Hword:
+ context.Assembler.Stlxrh(desired, address, result);
+ break;
+ default:
+ context.Assembler.Stlxr(desired, address, result);
+ break;
+ }
+
+ context.Assembler.Cbnz(result, startOffset - context.StreamOffset); // Retry if store failed.
+
+ context.JumpHere();
+
+ context.Assembler.Clrex();
+ }
+
+ private static void GenerateAtomicDcas(
+ CodeGenContext context,
+ Operand address,
+ Operand expectedLow,
+ Operand expectedHigh,
+ Operand desiredLow,
+ Operand desiredHigh,
+ Operand actualLow,
+ Operand actualHigh,
+ Operand temp0,
+ Operand temp1)
+ {
+ int startOffset = context.StreamOffset;
+
+ context.Assembler.Ldaxp(actualLow, actualHigh, address);
+ context.Assembler.Eor(temp0, actualHigh, expectedHigh);
+ context.Assembler.Eor(temp1, actualLow, expectedLow);
+ context.Assembler.Orr(temp0, temp1, temp0);
+
+ context.JumpToNearIfNotZero(temp0);
+
+ Operand result = Register(temp0, OperandType.I32);
+
+ context.Assembler.Stlxp(desiredLow, desiredHigh, address, result);
+ context.Assembler.Cbnz(result, startOffset - context.StreamOffset); // Retry if store failed.
+
+ context.JumpHere();
+
+ context.Assembler.Clrex();
+ }
+
+ private static bool TryPairMemoryOp(CodeGenContext context, Operation currentOp, Operation nextOp)
+ {
+ if (!TryGetMemOpBaseAndOffset(currentOp, out Operand op1Base, out int op1Offset))
+ {
+ return false;
+ }
+
+ if (!TryGetMemOpBaseAndOffset(nextOp, out Operand op2Base, out int op2Offset))
+ {
+ return false;
+ }
+
+ if (op1Base != op2Base)
+ {
+ return false;
+ }
+
+ OperandType valueType = GetMemOpValueType(currentOp);
+
+ if (valueType != GetMemOpValueType(nextOp) || op1Offset + valueType.GetSizeInBytes() != op2Offset)
+ {
+ return false;
+ }
+
+ if (!CodeGenCommon.ConstFitsOnSImm7(op1Offset, valueType.GetSizeInBytesLog2()))
+ {
+ return false;
+ }
+
+ if (currentOp.Instruction == Instruction.Load)
+ {
+ context.Assembler.LdpRiUn(currentOp.Destination, nextOp.Destination, op1Base, op1Offset);
+ }
+ else if (currentOp.Instruction == Instruction.Store)
+ {
+ context.Assembler.StpRiUn(currentOp.GetSource(1), nextOp.GetSource(1), op1Base, op1Offset);
+ }
+ else
+ {
+ return false;
+ }
+
+ return true;
+ }
+
+ private static bool IsLoadOrStore(Operation operation)
+ {
+ return operation.Instruction == Instruction.Load || operation.Instruction == Instruction.Store;
+ }
+
+ private static OperandType GetMemOpValueType(Operation operation)
+ {
+ if (operation.Destination != default)
+ {
+ return operation.Destination.Type;
+ }
+
+ return operation.GetSource(1).Type;
+ }
+
+ private static bool TryGetMemOpBaseAndOffset(Operation operation, out Operand baseAddress, out int offset)
+ {
+ baseAddress = default;
+ offset = 0;
+ Operand address = operation.GetSource(0);
+
+ if (address.Kind != OperandKind.Memory)
+ {
+ return false;
+ }
+
+ MemoryOperand memOp = address.GetMemory();
+ Operand baseOp = memOp.BaseAddress;
+
+ if (baseOp == default)
+ {
+ baseOp = memOp.Index;
+
+ if (baseOp == default || memOp.Scale != Multiplier.x1)
+ {
+ return false;
+ }
+ }
+ if (memOp.Index != default)
+ {
+ return false;
+ }
+
+ baseAddress = memOp.BaseAddress;
+ offset = memOp.Displacement;
+
+ return true;
+ }
+
+ private static Operand Register(Operand operand, OperandType type = OperandType.I64)
+ {
+ return Register(operand.GetRegister().Index, type);
+ }
+
+ private static Operand Register(int register, OperandType type = OperandType.I64)
+ {
+ return Factory.Register(register, RegisterType.Integer, type);
+ }
+
+ private static int Align16(int value)
+ {
+ return (value + 0xf) & ~0xf;
+ }
+
+ [Conditional("DEBUG")]
+ private static void ValidateUnOp(Operand dest, Operand source)
+ {
+ // Destination and source aren't forced to be equals
+ // EnsureSameReg (dest, source);
+ EnsureSameType(dest, source);
+ }
+
+ [Conditional("DEBUG")]
+ private static void ValidateBinOp(Operand dest, Operand src1, Operand src2)
+ {
+ // Destination and source aren't forced to be equals
+ // EnsureSameReg (dest, src1);
+ EnsureSameType(dest, src1, src2);
+ }
+
+ [Conditional("DEBUG")]
+ private static void ValidateShift(Operand dest, Operand src1, Operand src2)
+ {
+ // Destination and source aren't forced to be equals
+ // EnsureSameReg (dest, src1);
+ EnsureSameType(dest, src1);
+
+ Debug.Assert(dest.Type.IsInteger() && src2.Type == OperandType.I32);
+ }
+
+ private static void EnsureSameReg(Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Kind == OperandKind.Register || op1.Kind == OperandKind.Memory);
+ Debug.Assert(op1.Kind == op2.Kind);
+ Debug.Assert(op1.Value == op2.Value);
+ }
+
+ private static void EnsureSameType(Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ }
+
+ private static void EnsureSameType(Operand op1, Operand op2, Operand op3)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ Debug.Assert(op1.Type == op3.Type);
+ }
+
+ private static void EnsureSameType(Operand op1, Operand op2, Operand op3, Operand op4)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ Debug.Assert(op1.Type == op3.Type);
+ Debug.Assert(op1.Type == op4.Type);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs b/src/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs
new file mode 100644
index 00000000..aaa00bb6
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs
@@ -0,0 +1,662 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Diagnostics;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static class CodeGeneratorIntrinsic
+ {
+ public static void GenerateOperation(CodeGenContext context, Operation operation)
+ {
+ Intrinsic intrin = operation.Intrinsic;
+
+ IntrinsicInfo info = IntrinsicTable.GetInfo(intrin & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask));
+
+ switch (info.Type)
+ {
+ case IntrinsicType.ScalarUnary:
+ GenerateVectorUnary(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.ScalarUnaryByElem:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateVectorUnaryByElem(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(1).AsInt32(),
+ operation.Destination,
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.ScalarBinary:
+ GenerateVectorBinary(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.ScalarBinaryFPByElem:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryFPByElem(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(2).AsInt32(),
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.ScalarBinaryRd:
+ GenerateVectorUnary(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.ScalarBinaryShl:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShlImm(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.ScalarBinaryShr:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShrImm(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.ScalarFPCompare:
+ GenerateScalarFPCompare(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.ScalarFPConvFixed:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShrImm(
+ context,
+ 0,
+ ((uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift) + 2u,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.ScalarFPConvFixedGpr:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateScalarFPConvGpr(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.ScalarFPConvGpr:
+ GenerateScalarFPConvGpr(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.ScalarTernary:
+ GenerateScalarTernary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ operation.GetSource(2),
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.ScalarTernaryFPRdByElem:
+ Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryFPByElem(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(3).AsInt32(),
+ operation.Destination,
+ operation.GetSource(1),
+ operation.GetSource(2));
+ break;
+ case IntrinsicType.ScalarTernaryShlRd:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShlImm(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ (uint)operation.GetSource(2).AsInt32());
+ break;
+ case IntrinsicType.ScalarTernaryShrRd:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShrImm(
+ context,
+ 0,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ (uint)operation.GetSource(2).AsInt32());
+ break;
+
+ case IntrinsicType.VectorUnary:
+ GenerateVectorUnary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.VectorUnaryByElem:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateVectorUnaryByElem(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(1).AsInt32(),
+ operation.Destination,
+ operation.GetSource(0));
+ break;
+ case IntrinsicType.VectorBinary:
+ GenerateVectorBinary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.VectorBinaryBitwise:
+ GenerateVectorBinary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.VectorBinaryByElem:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryByElem(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(2).AsInt32(),
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.VectorBinaryFPByElem:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryFPByElem(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(2).AsInt32(),
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.VectorBinaryRd:
+ GenerateVectorUnary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1));
+ break;
+ case IntrinsicType.VectorBinaryShl:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShlImm(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.VectorBinaryShr:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShrImm(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.VectorFPConvFixed:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShrImm(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ ((uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift) + 2u,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(0),
+ (uint)operation.GetSource(1).AsInt32());
+ break;
+ case IntrinsicType.VectorInsertByElem:
+ Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant);
+ Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
+
+ GenerateVectorInsertByElem(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(3).AsInt32(),
+ (uint)operation.GetSource(1).AsInt32(),
+ operation.Destination,
+ operation.GetSource(2));
+ break;
+ case IntrinsicType.VectorLookupTable:
+ Debug.Assert((uint)(operation.SourcesCount - 2) <= 3);
+
+ for (int i = 1; i < operation.SourcesCount - 1; i++)
+ {
+ Register currReg = operation.GetSource(i).GetRegister();
+ Register prevReg = operation.GetSource(i - 1).GetRegister();
+
+ Debug.Assert(prevReg.Index + 1 == currReg.Index && currReg.Type == RegisterType.Vector);
+ }
+
+ GenerateVectorBinary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ info.Inst | ((uint)(operation.SourcesCount - 2) << 13),
+ operation.Destination,
+ operation.GetSource(0),
+ operation.GetSource(operation.SourcesCount - 1));
+ break;
+ case IntrinsicType.VectorTernaryFPRdByElem:
+ Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryFPByElem(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(3).AsInt32(),
+ operation.Destination,
+ operation.GetSource(1),
+ operation.GetSource(2));
+ break;
+ case IntrinsicType.VectorTernaryRd:
+ GenerateVectorBinary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ operation.GetSource(2));
+ break;
+ case IntrinsicType.VectorTernaryRdBitwise:
+ GenerateVectorBinary(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ operation.GetSource(2));
+ break;
+ case IntrinsicType.VectorTernaryRdByElem:
+ Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryByElem(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ (uint)operation.GetSource(3).AsInt32(),
+ operation.Destination,
+ operation.GetSource(1),
+ operation.GetSource(2));
+ break;
+ case IntrinsicType.VectorTernaryShlRd:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShlImm(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ (uint)operation.GetSource(2).AsInt32());
+ break;
+ case IntrinsicType.VectorTernaryShrRd:
+ Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant);
+
+ GenerateVectorBinaryShrImm(
+ context,
+ (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift,
+ (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift,
+ info.Inst,
+ operation.Destination,
+ operation.GetSource(1),
+ (uint)operation.GetSource(2).AsInt32());
+ break;
+
+ case IntrinsicType.GetRegister:
+ context.Assembler.WriteInstruction(info.Inst, operation.Destination);
+ break;
+ case IntrinsicType.SetRegister:
+ context.Assembler.WriteInstruction(info.Inst, operation.GetSource(0));
+ break;
+
+ default:
+ throw new NotImplementedException(info.Type.ToString());
+ }
+ }
+
+ private static void GenerateScalarFPCompare(
+ CodeGenContext context,
+ uint sz,
+ uint instruction,
+ Operand dest,
+ Operand rn,
+ Operand rm)
+ {
+ instruction |= (sz << 22);
+
+ if (rm.Kind == OperandKind.Constant && rm.Value == 0)
+ {
+ instruction |= 0b1000;
+ rm = rn;
+ }
+
+ context.Assembler.WriteInstructionRm16NoRet(instruction, rn, rm);
+ context.Assembler.Mrs(dest, 1, 3, 4, 2, 0);
+ }
+
+ private static void GenerateScalarFPConvGpr(
+ CodeGenContext context,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn)
+ {
+ instruction |= (sz << 22);
+
+ if (rd.Type.IsInteger())
+ {
+ context.Assembler.WriteInstructionAuto(instruction, rd, rn);
+ }
+ else
+ {
+ if (rn.Type == OperandType.I64)
+ {
+ instruction |= Assembler.SfFlag;
+ }
+
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+ }
+
+ private static void GenerateScalarFPConvGpr(
+ CodeGenContext context,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ uint fBits)
+ {
+ Debug.Assert(fBits <= 64);
+
+ instruction |= (sz << 22);
+ instruction |= (64 - fBits) << 10;
+
+ if (rd.Type.IsInteger())
+ {
+ Debug.Assert(rd.Type != OperandType.I32 || fBits <= 32);
+
+ context.Assembler.WriteInstructionAuto(instruction, rd, rn);
+ }
+ else
+ {
+ if (rn.Type == OperandType.I64)
+ {
+ instruction |= Assembler.SfFlag;
+ }
+ else
+ {
+ Debug.Assert(fBits <= 32);
+ }
+
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+
+ }
+
+ private static void GenerateScalarTernary(
+ CodeGenContext context,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ Operand rm,
+ Operand ra)
+ {
+ instruction |= (sz << 22);
+
+ context.Assembler.WriteInstruction(instruction, rd, rn, rm, ra);
+ }
+
+ private static void GenerateVectorUnary(
+ CodeGenContext context,
+ uint q,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn)
+ {
+ instruction |= (q << 30) | (sz << 22);
+
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+
+ private static void GenerateVectorUnaryByElem(
+ CodeGenContext context,
+ uint q,
+ uint sz,
+ uint instruction,
+ uint srcIndex,
+ Operand rd,
+ Operand rn)
+ {
+ uint imm5 = (srcIndex << ((int)sz + 1)) | (1u << (int)sz);
+
+ instruction |= (q << 30) | (imm5 << 16);
+
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+
+ private static void GenerateVectorBinary(
+ CodeGenContext context,
+ uint q,
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ Operand rm)
+ {
+ instruction |= (q << 30);
+
+ context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ private static void GenerateVectorBinary(
+ CodeGenContext context,
+ uint q,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ Operand rm)
+ {
+ instruction |= (q << 30) | (sz << 22);
+
+ context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ private static void GenerateVectorBinaryByElem(
+ CodeGenContext context,
+ uint q,
+ uint size,
+ uint instruction,
+ uint srcIndex,
+ Operand rd,
+ Operand rn,
+ Operand rm)
+ {
+ instruction |= (q << 30) | (size << 22);
+
+ if (size == 2)
+ {
+ instruction |= ((srcIndex & 1) << 21) | ((srcIndex & 2) << 10);
+ }
+ else
+ {
+ instruction |= ((srcIndex & 3) << 20) | ((srcIndex & 4) << 9);
+ }
+
+ context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ private static void GenerateVectorBinaryFPByElem(
+ CodeGenContext context,
+ uint q,
+ uint sz,
+ uint instruction,
+ uint srcIndex,
+ Operand rd,
+ Operand rn,
+ Operand rm)
+ {
+ instruction |= (q << 30) | (sz << 22);
+
+ if (sz != 0)
+ {
+ instruction |= (srcIndex & 1) << 11;
+ }
+ else
+ {
+ instruction |= ((srcIndex & 1) << 21) | ((srcIndex & 2) << 10);
+ }
+
+ context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm);
+ }
+
+ private static void GenerateVectorBinaryShlImm(
+ CodeGenContext context,
+ uint q,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ uint shift)
+ {
+ instruction |= (q << 30);
+
+ Debug.Assert(shift >= 0 && shift < (8u << (int)sz));
+
+ uint imm = (8u << (int)sz) | (shift & (0x3fu >> (int)(3 - sz)));
+
+ instruction |= (imm << 16);
+
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+
+ private static void GenerateVectorBinaryShrImm(
+ CodeGenContext context,
+ uint q,
+ uint sz,
+ uint instruction,
+ Operand rd,
+ Operand rn,
+ uint shift)
+ {
+ instruction |= (q << 30);
+
+ Debug.Assert(shift > 0 && shift <= (8u << (int)sz));
+
+ uint imm = (8u << (int)sz) | ((8u << (int)sz) - shift);
+
+ instruction |= (imm << 16);
+
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+
+ private static void GenerateVectorInsertByElem(
+ CodeGenContext context,
+ uint sz,
+ uint instruction,
+ uint srcIndex,
+ uint dstIndex,
+ Operand rd,
+ Operand rn)
+ {
+ uint imm4 = srcIndex << (int)sz;
+ uint imm5 = (dstIndex << ((int)sz + 1)) | (1u << (int)sz);
+
+ instruction |= imm4 << 11;
+ instruction |= imm5 << 16;
+
+ context.Assembler.WriteInstruction(instruction, rd, rn);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs b/src/ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs
new file mode 100644
index 00000000..99ff299e
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs
@@ -0,0 +1,185 @@
+using System;
+using System.Linq;
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.Arm;
+using System.Runtime.Versioning;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static partial class HardwareCapabilities
+ {
+ static HardwareCapabilities()
+ {
+ if (!ArmBase.Arm64.IsSupported)
+ {
+ return;
+ }
+
+ if (OperatingSystem.IsLinux())
+ {
+ LinuxFeatureInfoHwCap = (LinuxFeatureFlagsHwCap)getauxval(AT_HWCAP);
+ LinuxFeatureInfoHwCap2 = (LinuxFeatureFlagsHwCap2)getauxval(AT_HWCAP2);
+ }
+
+ if (OperatingSystem.IsMacOS())
+ {
+ for (int i = 0; i < _sysctlNames.Length; i++)
+ {
+ if (CheckSysctlName(_sysctlNames[i]))
+ {
+ MacOsFeatureInfo |= (MacOsFeatureFlags)(1 << i);
+ }
+ }
+ }
+ }
+
+#region Linux
+
+ private const ulong AT_HWCAP = 16;
+ private const ulong AT_HWCAP2 = 26;
+
+ [LibraryImport("libc", SetLastError = true)]
+ private static partial ulong getauxval(ulong type);
+
+ [Flags]
+ public enum LinuxFeatureFlagsHwCap : ulong
+ {
+ Fp = 1 << 0,
+ Asimd = 1 << 1,
+ Evtstrm = 1 << 2,
+ Aes = 1 << 3,
+ Pmull = 1 << 4,
+ Sha1 = 1 << 5,
+ Sha2 = 1 << 6,
+ Crc32 = 1 << 7,
+ Atomics = 1 << 8,
+ FpHp = 1 << 9,
+ AsimdHp = 1 << 10,
+ CpuId = 1 << 11,
+ AsimdRdm = 1 << 12,
+ Jscvt = 1 << 13,
+ Fcma = 1 << 14,
+ Lrcpc = 1 << 15,
+ DcpOp = 1 << 16,
+ Sha3 = 1 << 17,
+ Sm3 = 1 << 18,
+ Sm4 = 1 << 19,
+ AsimdDp = 1 << 20,
+ Sha512 = 1 << 21,
+ Sve = 1 << 22,
+ AsimdFhm = 1 << 23,
+ Dit = 1 << 24,
+ Uscat = 1 << 25,
+ Ilrcpc = 1 << 26,
+ FlagM = 1 << 27,
+ Ssbs = 1 << 28,
+ Sb = 1 << 29,
+ Paca = 1 << 30,
+ Pacg = 1UL << 31
+ }
+
+ [Flags]
+ public enum LinuxFeatureFlagsHwCap2 : ulong
+ {
+ Dcpodp = 1 << 0,
+ Sve2 = 1 << 1,
+ SveAes = 1 << 2,
+ SvePmull = 1 << 3,
+ SveBitperm = 1 << 4,
+ SveSha3 = 1 << 5,
+ SveSm4 = 1 << 6,
+ FlagM2 = 1 << 7,
+ Frint = 1 << 8,
+ SveI8mm = 1 << 9,
+ SveF32mm = 1 << 10,
+ SveF64mm = 1 << 11,
+ SveBf16 = 1 << 12,
+ I8mm = 1 << 13,
+ Bf16 = 1 << 14,
+ Dgh = 1 << 15,
+ Rng = 1 << 16,
+ Bti = 1 << 17,
+ Mte = 1 << 18,
+ Ecv = 1 << 19,
+ Afp = 1 << 20,
+ Rpres = 1 << 21,
+ Mte3 = 1 << 22,
+ Sme = 1 << 23,
+ Sme_i16i64 = 1 << 24,
+ Sme_f64f64 = 1 << 25,
+ Sme_i8i32 = 1 << 26,
+ Sme_f16f32 = 1 << 27,
+ Sme_b16f32 = 1 << 28,
+ Sme_f32f32 = 1 << 29,
+ Sme_fa64 = 1 << 30,
+ Wfxt = 1UL << 31,
+ Ebf16 = 1UL << 32,
+ Sve_Ebf16 = 1UL << 33,
+ Cssc = 1UL << 34,
+ Rprfm = 1UL << 35,
+ Sve2p1 = 1UL << 36
+ }
+
+ public static LinuxFeatureFlagsHwCap LinuxFeatureInfoHwCap { get; } = 0;
+ public static LinuxFeatureFlagsHwCap2 LinuxFeatureInfoHwCap2 { get; } = 0;
+
+#endregion
+
+#region macOS
+
+ [LibraryImport("libSystem.dylib", SetLastError = true)]
+ private static unsafe partial int sysctlbyname([MarshalAs(UnmanagedType.LPStr)] string name, out int oldValue, ref ulong oldSize, IntPtr newValue, ulong newValueSize);
+
+ [SupportedOSPlatform("macos")]
+ private static bool CheckSysctlName(string name)
+ {
+ ulong size = sizeof(int);
+ if (sysctlbyname(name, out int val, ref size, IntPtr.Zero, 0) == 0 && size == sizeof(int))
+ {
+ return val != 0;
+ }
+ return false;
+ }
+
+ private static string[] _sysctlNames = new string[]
+ {
+ "hw.optional.floatingpoint",
+ "hw.optional.AdvSIMD",
+ "hw.optional.arm.FEAT_FP16",
+ "hw.optional.arm.FEAT_AES",
+ "hw.optional.arm.FEAT_PMULL",
+ "hw.optional.arm.FEAT_LSE",
+ "hw.optional.armv8_crc32",
+ "hw.optional.arm.FEAT_SHA1",
+ "hw.optional.arm.FEAT_SHA256"
+ };
+
+ [Flags]
+ public enum MacOsFeatureFlags
+ {
+ Fp = 1 << 0,
+ AdvSimd = 1 << 1,
+ Fp16 = 1 << 2,
+ Aes = 1 << 3,
+ Pmull = 1 << 4,
+ Lse = 1 << 5,
+ Crc32 = 1 << 6,
+ Sha1 = 1 << 7,
+ Sha256 = 1 << 8
+ }
+
+ public static MacOsFeatureFlags MacOsFeatureInfo { get; } = 0;
+
+#endregion
+
+ public static bool SupportsAdvSimd => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Asimd) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.AdvSimd);
+ public static bool SupportsAes => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Aes) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Aes);
+ public static bool SupportsPmull => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Pmull) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Pmull);
+ public static bool SupportsLse => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Atomics) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Lse);
+ public static bool SupportsCrc32 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Crc32) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Crc32);
+ public static bool SupportsSha1 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Sha1) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Sha1);
+ public static bool SupportsSha256 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Sha2) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Sha256);
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs b/src/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs
new file mode 100644
index 00000000..8695db90
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.CodeGen.Arm64
+{
+ struct IntrinsicInfo
+ {
+ public uint Inst { get; }
+ public IntrinsicType Type { get; }
+
+ public IntrinsicInfo(uint inst, IntrinsicType type)
+ {
+ Inst = inst;
+ Type = type;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs b/src/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs
new file mode 100644
index 00000000..a309d56d
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs
@@ -0,0 +1,463 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static class IntrinsicTable
+ {
+ private static IntrinsicInfo[] _intrinTable;
+
+ static IntrinsicTable()
+ {
+ _intrinTable = new IntrinsicInfo[EnumUtils.GetCount(typeof(Intrinsic))];
+
+ Add(Intrinsic.Arm64AbsS, new IntrinsicInfo(0x5e20b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64AbsV, new IntrinsicInfo(0x0e20b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64AddhnV, new IntrinsicInfo(0x0e204000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64AddpS, new IntrinsicInfo(0x5e31b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64AddpV, new IntrinsicInfo(0x0e20bc00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64AddvV, new IntrinsicInfo(0x0e31b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64AddS, new IntrinsicInfo(0x5e208400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64AddV, new IntrinsicInfo(0x0e208400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64AesdV, new IntrinsicInfo(0x4e285800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64AeseV, new IntrinsicInfo(0x4e284800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64AesimcV, new IntrinsicInfo(0x4e287800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64AesmcV, new IntrinsicInfo(0x4e286800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64AndV, new IntrinsicInfo(0x0e201c00u, IntrinsicType.VectorBinaryBitwise));
+ Add(Intrinsic.Arm64BicVi, new IntrinsicInfo(0x2f001400u, IntrinsicType.VectorBinaryBitwiseImm));
+ Add(Intrinsic.Arm64BicV, new IntrinsicInfo(0x0e601c00u, IntrinsicType.VectorBinaryBitwise));
+ Add(Intrinsic.Arm64BifV, new IntrinsicInfo(0x2ee01c00u, IntrinsicType.VectorTernaryRdBitwise));
+ Add(Intrinsic.Arm64BitV, new IntrinsicInfo(0x2ea01c00u, IntrinsicType.VectorTernaryRdBitwise));
+ Add(Intrinsic.Arm64BslV, new IntrinsicInfo(0x2e601c00u, IntrinsicType.VectorTernaryRdBitwise));
+ Add(Intrinsic.Arm64ClsV, new IntrinsicInfo(0x0e204800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64ClzV, new IntrinsicInfo(0x2e204800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64CmeqS, new IntrinsicInfo(0x7e208c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64CmeqV, new IntrinsicInfo(0x2e208c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64CmeqSz, new IntrinsicInfo(0x5e209800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64CmeqVz, new IntrinsicInfo(0x0e209800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64CmgeS, new IntrinsicInfo(0x5e203c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64CmgeV, new IntrinsicInfo(0x0e203c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64CmgeSz, new IntrinsicInfo(0x7e208800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64CmgeVz, new IntrinsicInfo(0x2e208800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64CmgtS, new IntrinsicInfo(0x5e203400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64CmgtV, new IntrinsicInfo(0x0e203400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64CmgtSz, new IntrinsicInfo(0x5e208800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64CmgtVz, new IntrinsicInfo(0x0e208800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64CmhiS, new IntrinsicInfo(0x7e203400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64CmhiV, new IntrinsicInfo(0x2e203400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64CmhsS, new IntrinsicInfo(0x7e203c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64CmhsV, new IntrinsicInfo(0x2e203c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64CmleSz, new IntrinsicInfo(0x7e209800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64CmleVz, new IntrinsicInfo(0x2e209800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64CmltSz, new IntrinsicInfo(0x5e20a800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64CmltVz, new IntrinsicInfo(0x0e20a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64CmtstS, new IntrinsicInfo(0x5e208c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64CmtstV, new IntrinsicInfo(0x0e208c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64CntV, new IntrinsicInfo(0x0e205800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64DupSe, new IntrinsicInfo(0x5e000400u, IntrinsicType.ScalarUnaryByElem));
+ Add(Intrinsic.Arm64DupVe, new IntrinsicInfo(0x0e000400u, IntrinsicType.VectorUnaryByElem));
+ Add(Intrinsic.Arm64DupGp, new IntrinsicInfo(0x0e000c00u, IntrinsicType.VectorUnaryByElem));
+ Add(Intrinsic.Arm64EorV, new IntrinsicInfo(0x2e201c00u, IntrinsicType.VectorBinaryBitwise));
+ Add(Intrinsic.Arm64ExtV, new IntrinsicInfo(0x2e000000u, IntrinsicType.VectorExt));
+ Add(Intrinsic.Arm64FabdS, new IntrinsicInfo(0x7ea0d400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FabdV, new IntrinsicInfo(0x2ea0d400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FabsV, new IntrinsicInfo(0x0ea0f800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FabsS, new IntrinsicInfo(0x1e20c000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FacgeS, new IntrinsicInfo(0x7e20ec00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FacgeV, new IntrinsicInfo(0x2e20ec00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FacgtS, new IntrinsicInfo(0x7ea0ec00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FacgtV, new IntrinsicInfo(0x2ea0ec00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FaddpS, new IntrinsicInfo(0x7e30d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FaddpV, new IntrinsicInfo(0x2e20d400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FaddV, new IntrinsicInfo(0x0e20d400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FaddS, new IntrinsicInfo(0x1e202800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FccmpeS, new IntrinsicInfo(0x1e200410u, IntrinsicType.ScalarFPCompareCond));
+ Add(Intrinsic.Arm64FccmpS, new IntrinsicInfo(0x1e200400u, IntrinsicType.ScalarFPCompareCond));
+ Add(Intrinsic.Arm64FcmeqS, new IntrinsicInfo(0x5e20e400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FcmeqV, new IntrinsicInfo(0x0e20e400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FcmeqSz, new IntrinsicInfo(0x5ea0d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcmeqVz, new IntrinsicInfo(0x0ea0d800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcmgeS, new IntrinsicInfo(0x7e20e400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FcmgeV, new IntrinsicInfo(0x2e20e400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FcmgeSz, new IntrinsicInfo(0x7ea0c800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcmgeVz, new IntrinsicInfo(0x2ea0c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcmgtS, new IntrinsicInfo(0x7ea0e400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FcmgtV, new IntrinsicInfo(0x2ea0e400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FcmgtSz, new IntrinsicInfo(0x5ea0c800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcmgtVz, new IntrinsicInfo(0x0ea0c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcmleSz, new IntrinsicInfo(0x7ea0d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcmleVz, new IntrinsicInfo(0x2ea0d800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcmltSz, new IntrinsicInfo(0x5ea0e800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcmltVz, new IntrinsicInfo(0x0ea0e800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcmpeS, new IntrinsicInfo(0x1e202010u, IntrinsicType.ScalarFPCompare));
+ Add(Intrinsic.Arm64FcmpS, new IntrinsicInfo(0x1e202000u, IntrinsicType.ScalarFPCompare));
+ Add(Intrinsic.Arm64FcselS, new IntrinsicInfo(0x1e200c00u, IntrinsicType.ScalarFcsel));
+ Add(Intrinsic.Arm64FcvtasS, new IntrinsicInfo(0x5e21c800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtasV, new IntrinsicInfo(0x0e21c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtasGp, new IntrinsicInfo(0x1e240000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtauS, new IntrinsicInfo(0x7e21c800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtauV, new IntrinsicInfo(0x2e21c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtauGp, new IntrinsicInfo(0x1e250000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtlV, new IntrinsicInfo(0x0e217800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtmsS, new IntrinsicInfo(0x5e21b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtmsV, new IntrinsicInfo(0x0e21b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtmsGp, new IntrinsicInfo(0x1e300000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtmuS, new IntrinsicInfo(0x7e21b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtmuV, new IntrinsicInfo(0x2e21b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtmuGp, new IntrinsicInfo(0x1e310000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtnsS, new IntrinsicInfo(0x5e21a800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtnsV, new IntrinsicInfo(0x0e21a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtnsGp, new IntrinsicInfo(0x1e200000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtnuS, new IntrinsicInfo(0x7e21a800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtnuV, new IntrinsicInfo(0x2e21a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtnuGp, new IntrinsicInfo(0x1e210000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtnV, new IntrinsicInfo(0x0e216800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64FcvtpsS, new IntrinsicInfo(0x5ea1a800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtpsV, new IntrinsicInfo(0x0ea1a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtpsGp, new IntrinsicInfo(0x1e280000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtpuS, new IntrinsicInfo(0x7ea1a800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtpuV, new IntrinsicInfo(0x2ea1a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtpuGp, new IntrinsicInfo(0x1e290000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtxnS, new IntrinsicInfo(0x7e216800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtxnV, new IntrinsicInfo(0x2e216800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtzsSFixed, new IntrinsicInfo(0x5f00fc00u, IntrinsicType.ScalarFPConvFixed));
+ Add(Intrinsic.Arm64FcvtzsVFixed, new IntrinsicInfo(0x0f00fc00u, IntrinsicType.VectorFPConvFixed));
+ Add(Intrinsic.Arm64FcvtzsS, new IntrinsicInfo(0x5ea1b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtzsV, new IntrinsicInfo(0x0ea1b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtzsGpFixed, new IntrinsicInfo(0x1e180000u, IntrinsicType.ScalarFPConvFixedGpr));
+ Add(Intrinsic.Arm64FcvtzsGp, new IntrinsicInfo(0x1e380000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtzuSFixed, new IntrinsicInfo(0x7f00fc00u, IntrinsicType.ScalarFPConvFixed));
+ Add(Intrinsic.Arm64FcvtzuVFixed, new IntrinsicInfo(0x2f00fc00u, IntrinsicType.VectorFPConvFixed));
+ Add(Intrinsic.Arm64FcvtzuS, new IntrinsicInfo(0x7ea1b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FcvtzuV, new IntrinsicInfo(0x2ea1b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FcvtzuGpFixed, new IntrinsicInfo(0x1e190000u, IntrinsicType.ScalarFPConvFixedGpr));
+ Add(Intrinsic.Arm64FcvtzuGp, new IntrinsicInfo(0x1e390000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FcvtS, new IntrinsicInfo(0x1e224000u, IntrinsicType.ScalarFPConv));
+ Add(Intrinsic.Arm64FdivV, new IntrinsicInfo(0x2e20fc00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FdivS, new IntrinsicInfo(0x1e201800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FmaddS, new IntrinsicInfo(0x1f000000u, IntrinsicType.ScalarTernary));
+ Add(Intrinsic.Arm64FmaxnmpS, new IntrinsicInfo(0x7e30c800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FmaxnmpV, new IntrinsicInfo(0x2e20c400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FmaxnmvV, new IntrinsicInfo(0x2e30c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FmaxnmV, new IntrinsicInfo(0x0e20c400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FmaxnmS, new IntrinsicInfo(0x1e206800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FmaxpS, new IntrinsicInfo(0x7e30f800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FmaxpV, new IntrinsicInfo(0x2e20f400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FmaxvV, new IntrinsicInfo(0x2e30f800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FmaxV, new IntrinsicInfo(0x0e20f400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FmaxS, new IntrinsicInfo(0x1e204800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FminnmpS, new IntrinsicInfo(0x7eb0c800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FminnmpV, new IntrinsicInfo(0x2ea0c400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FminnmvV, new IntrinsicInfo(0x2eb0c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FminnmV, new IntrinsicInfo(0x0ea0c400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FminnmS, new IntrinsicInfo(0x1e207800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FminpS, new IntrinsicInfo(0x7eb0f800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FminpV, new IntrinsicInfo(0x2ea0f400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FminvV, new IntrinsicInfo(0x2eb0f800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FminV, new IntrinsicInfo(0x0ea0f400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FminS, new IntrinsicInfo(0x1e205800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FmlaSe, new IntrinsicInfo(0x5f801000u, IntrinsicType.ScalarTernaryFPRdByElem));
+ Add(Intrinsic.Arm64FmlaVe, new IntrinsicInfo(0x0f801000u, IntrinsicType.VectorTernaryFPRdByElem));
+ Add(Intrinsic.Arm64FmlaV, new IntrinsicInfo(0x0e20cc00u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64FmlsSe, new IntrinsicInfo(0x5f805000u, IntrinsicType.ScalarTernaryFPRdByElem));
+ Add(Intrinsic.Arm64FmlsVe, new IntrinsicInfo(0x0f805000u, IntrinsicType.VectorTernaryFPRdByElem));
+ Add(Intrinsic.Arm64FmlsV, new IntrinsicInfo(0x0ea0cc00u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64FmovVi, new IntrinsicInfo(0x0f00f400u, IntrinsicType.VectorFmovi));
+ Add(Intrinsic.Arm64FmovS, new IntrinsicInfo(0x1e204000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FmovGp, new IntrinsicInfo(0x1e260000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64FmovSi, new IntrinsicInfo(0x1e201000u, IntrinsicType.ScalarFmovi));
+ Add(Intrinsic.Arm64FmsubS, new IntrinsicInfo(0x1f008000u, IntrinsicType.ScalarTernary));
+ Add(Intrinsic.Arm64FmulxSe, new IntrinsicInfo(0x7f809000u, IntrinsicType.ScalarBinaryFPByElem));
+ Add(Intrinsic.Arm64FmulxVe, new IntrinsicInfo(0x2f809000u, IntrinsicType.VectorBinaryFPByElem));
+ Add(Intrinsic.Arm64FmulxS, new IntrinsicInfo(0x5e20dc00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FmulxV, new IntrinsicInfo(0x0e20dc00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FmulSe, new IntrinsicInfo(0x5f809000u, IntrinsicType.ScalarBinaryFPByElem));
+ Add(Intrinsic.Arm64FmulVe, new IntrinsicInfo(0x0f809000u, IntrinsicType.VectorBinaryFPByElem));
+ Add(Intrinsic.Arm64FmulV, new IntrinsicInfo(0x2e20dc00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FmulS, new IntrinsicInfo(0x1e200800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FnegV, new IntrinsicInfo(0x2ea0f800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FnegS, new IntrinsicInfo(0x1e214000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FnmaddS, new IntrinsicInfo(0x1f200000u, IntrinsicType.ScalarTernary));
+ Add(Intrinsic.Arm64FnmsubS, new IntrinsicInfo(0x1f208000u, IntrinsicType.ScalarTernary));
+ Add(Intrinsic.Arm64FnmulS, new IntrinsicInfo(0x1e208800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FrecpeS, new IntrinsicInfo(0x5ea1d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrecpeV, new IntrinsicInfo(0x0ea1d800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrecpsS, new IntrinsicInfo(0x5e20fc00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FrecpsV, new IntrinsicInfo(0x0e20fc00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FrecpxS, new IntrinsicInfo(0x5ea1f800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintaV, new IntrinsicInfo(0x2e218800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintaS, new IntrinsicInfo(0x1e264000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintiV, new IntrinsicInfo(0x2ea19800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintiS, new IntrinsicInfo(0x1e27c000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintmV, new IntrinsicInfo(0x0e219800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintmS, new IntrinsicInfo(0x1e254000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintnV, new IntrinsicInfo(0x0e218800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintnS, new IntrinsicInfo(0x1e244000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintpV, new IntrinsicInfo(0x0ea18800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintpS, new IntrinsicInfo(0x1e24c000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintxV, new IntrinsicInfo(0x2e219800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintxS, new IntrinsicInfo(0x1e274000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrintzV, new IntrinsicInfo(0x0ea19800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrintzS, new IntrinsicInfo(0x1e25c000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrsqrteS, new IntrinsicInfo(0x7ea1d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FrsqrteV, new IntrinsicInfo(0x2ea1d800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FrsqrtsS, new IntrinsicInfo(0x5ea0fc00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64FrsqrtsV, new IntrinsicInfo(0x0ea0fc00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FsqrtV, new IntrinsicInfo(0x2ea1f800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64FsqrtS, new IntrinsicInfo(0x1e21c000u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64FsubV, new IntrinsicInfo(0x0ea0d400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64FsubS, new IntrinsicInfo(0x1e203800u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64InsVe, new IntrinsicInfo(0x6e000400u, IntrinsicType.VectorInsertByElem));
+ Add(Intrinsic.Arm64InsGp, new IntrinsicInfo(0x4e001c00u, IntrinsicType.ScalarUnaryByElem));
+ Add(Intrinsic.Arm64Ld1rV, new IntrinsicInfo(0x0d40c000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld1Vms, new IntrinsicInfo(0x0c402000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld1Vss, new IntrinsicInfo(0x0d400000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64Ld2rV, new IntrinsicInfo(0x0d60c000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld2Vms, new IntrinsicInfo(0x0c408000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld2Vss, new IntrinsicInfo(0x0d600000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64Ld3rV, new IntrinsicInfo(0x0d40e000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld3Vms, new IntrinsicInfo(0x0c404000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld3Vss, new IntrinsicInfo(0x0d402000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64Ld4rV, new IntrinsicInfo(0x0d60e000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld4Vms, new IntrinsicInfo(0x0c400000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64Ld4Vss, new IntrinsicInfo(0x0d602000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64MlaVe, new IntrinsicInfo(0x2f000000u, IntrinsicType.VectorTernaryRdByElem));
+ Add(Intrinsic.Arm64MlaV, new IntrinsicInfo(0x0e209400u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64MlsVe, new IntrinsicInfo(0x2f004000u, IntrinsicType.VectorTernaryRdByElem));
+ Add(Intrinsic.Arm64MlsV, new IntrinsicInfo(0x2e209400u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64MoviV, new IntrinsicInfo(0x0f000400u, IntrinsicType.VectorMovi));
+ Add(Intrinsic.Arm64MrsFpcr, new IntrinsicInfo(0xd53b4400u, IntrinsicType.GetRegister));
+ Add(Intrinsic.Arm64MsrFpcr, new IntrinsicInfo(0xd51b4400u, IntrinsicType.SetRegister));
+ Add(Intrinsic.Arm64MrsFpsr, new IntrinsicInfo(0xd53b4420u, IntrinsicType.GetRegister));
+ Add(Intrinsic.Arm64MsrFpsr, new IntrinsicInfo(0xd51b4420u, IntrinsicType.SetRegister));
+ Add(Intrinsic.Arm64MulVe, new IntrinsicInfo(0x0f008000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64MulV, new IntrinsicInfo(0x0e209c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64MvniV, new IntrinsicInfo(0x2f000400u, IntrinsicType.VectorMvni));
+ Add(Intrinsic.Arm64NegS, new IntrinsicInfo(0x7e20b800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64NegV, new IntrinsicInfo(0x2e20b800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64NotV, new IntrinsicInfo(0x2e205800u, IntrinsicType.VectorUnaryBitwise));
+ Add(Intrinsic.Arm64OrnV, new IntrinsicInfo(0x0ee01c00u, IntrinsicType.VectorBinaryBitwise));
+ Add(Intrinsic.Arm64OrrVi, new IntrinsicInfo(0x0f001400u, IntrinsicType.VectorBinaryBitwiseImm));
+ Add(Intrinsic.Arm64OrrV, new IntrinsicInfo(0x0ea01c00u, IntrinsicType.VectorBinaryBitwise));
+ Add(Intrinsic.Arm64PmullV, new IntrinsicInfo(0x0e20e000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64PmulV, new IntrinsicInfo(0x2e209c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64RaddhnV, new IntrinsicInfo(0x2e204000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64RbitV, new IntrinsicInfo(0x2e605800u, IntrinsicType.VectorUnaryBitwise));
+ Add(Intrinsic.Arm64Rev16V, new IntrinsicInfo(0x0e201800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64Rev32V, new IntrinsicInfo(0x2e200800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64Rev64V, new IntrinsicInfo(0x0e200800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64RshrnV, new IntrinsicInfo(0x0f008c00u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64RsubhnV, new IntrinsicInfo(0x2e206000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64SabalV, new IntrinsicInfo(0x0e205000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64SabaV, new IntrinsicInfo(0x0e207c00u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64SabdlV, new IntrinsicInfo(0x0e207000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SabdV, new IntrinsicInfo(0x0e207400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SadalpV, new IntrinsicInfo(0x0e206800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64SaddlpV, new IntrinsicInfo(0x0e202800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64SaddlvV, new IntrinsicInfo(0x0e303800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64SaddlV, new IntrinsicInfo(0x0e200000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SaddwV, new IntrinsicInfo(0x0e201000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64ScvtfSFixed, new IntrinsicInfo(0x5f00e400u, IntrinsicType.ScalarFPConvFixed));
+ Add(Intrinsic.Arm64ScvtfVFixed, new IntrinsicInfo(0x0f00e400u, IntrinsicType.VectorFPConvFixed));
+ Add(Intrinsic.Arm64ScvtfS, new IntrinsicInfo(0x5e21d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64ScvtfV, new IntrinsicInfo(0x0e21d800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64ScvtfGpFixed, new IntrinsicInfo(0x1e020000u, IntrinsicType.ScalarFPConvFixedGpr));
+ Add(Intrinsic.Arm64ScvtfGp, new IntrinsicInfo(0x1e220000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64Sha1cV, new IntrinsicInfo(0x5e000000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64Sha1hV, new IntrinsicInfo(0x5e280800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64Sha1mV, new IntrinsicInfo(0x5e002000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64Sha1pV, new IntrinsicInfo(0x5e001000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64Sha1su0V, new IntrinsicInfo(0x5e003000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64Sha1su1V, new IntrinsicInfo(0x5e281800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64Sha256h2V, new IntrinsicInfo(0x5e005000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64Sha256hV, new IntrinsicInfo(0x5e004000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64Sha256su0V, new IntrinsicInfo(0x5e282800u, IntrinsicType.Vector128Unary));
+ Add(Intrinsic.Arm64Sha256su1V, new IntrinsicInfo(0x5e006000u, IntrinsicType.Vector128Binary));
+ Add(Intrinsic.Arm64ShaddV, new IntrinsicInfo(0x0e200400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64ShllV, new IntrinsicInfo(0x2e213800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64ShlS, new IntrinsicInfo(0x5f005400u, IntrinsicType.ScalarBinaryShl));
+ Add(Intrinsic.Arm64ShlV, new IntrinsicInfo(0x0f005400u, IntrinsicType.VectorBinaryShl));
+ Add(Intrinsic.Arm64ShrnV, new IntrinsicInfo(0x0f008400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64ShsubV, new IntrinsicInfo(0x0e202400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SliS, new IntrinsicInfo(0x7f005400u, IntrinsicType.ScalarTernaryShlRd));
+ Add(Intrinsic.Arm64SliV, new IntrinsicInfo(0x2f005400u, IntrinsicType.VectorTernaryShlRd));
+ Add(Intrinsic.Arm64SmaxpV, new IntrinsicInfo(0x0e20a400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SmaxvV, new IntrinsicInfo(0x0e30a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64SmaxV, new IntrinsicInfo(0x0e206400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SminpV, new IntrinsicInfo(0x0e20ac00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SminvV, new IntrinsicInfo(0x0e31a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64SminV, new IntrinsicInfo(0x0e206c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SmlalVe, new IntrinsicInfo(0x0f002000u, IntrinsicType.VectorTernaryRdByElem));
+ Add(Intrinsic.Arm64SmlalV, new IntrinsicInfo(0x0e208000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64SmlslVe, new IntrinsicInfo(0x0f006000u, IntrinsicType.VectorTernaryRdByElem));
+ Add(Intrinsic.Arm64SmlslV, new IntrinsicInfo(0x0e20a000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64SmovV, new IntrinsicInfo(0x0e002c00u, IntrinsicType.VectorUnaryByElem));
+ Add(Intrinsic.Arm64SmullVe, new IntrinsicInfo(0x0f00a000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64SmullV, new IntrinsicInfo(0x0e20c000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqabsS, new IntrinsicInfo(0x5e207800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64SqabsV, new IntrinsicInfo(0x0e207800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64SqaddS, new IntrinsicInfo(0x5e200c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqaddV, new IntrinsicInfo(0x0e200c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqdmlalSe, new IntrinsicInfo(0x5f003000u, IntrinsicType.ScalarBinaryByElem));
+ Add(Intrinsic.Arm64SqdmlalVe, new IntrinsicInfo(0x0f003000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64SqdmlalS, new IntrinsicInfo(0x5e209000u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqdmlalV, new IntrinsicInfo(0x0e209000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqdmlslSe, new IntrinsicInfo(0x5f007000u, IntrinsicType.ScalarBinaryByElem));
+ Add(Intrinsic.Arm64SqdmlslVe, new IntrinsicInfo(0x0f007000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64SqdmlslS, new IntrinsicInfo(0x5e20b000u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqdmlslV, new IntrinsicInfo(0x0e20b000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqdmulhSe, new IntrinsicInfo(0x5f00c000u, IntrinsicType.ScalarBinaryByElem));
+ Add(Intrinsic.Arm64SqdmulhVe, new IntrinsicInfo(0x0f00c000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64SqdmulhS, new IntrinsicInfo(0x5e20b400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqdmulhV, new IntrinsicInfo(0x0e20b400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqdmullSe, new IntrinsicInfo(0x5f00b000u, IntrinsicType.ScalarBinaryByElem));
+ Add(Intrinsic.Arm64SqdmullVe, new IntrinsicInfo(0x0f00b000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64SqdmullS, new IntrinsicInfo(0x5e20d000u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqdmullV, new IntrinsicInfo(0x0e20d000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqnegS, new IntrinsicInfo(0x7e207800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64SqnegV, new IntrinsicInfo(0x2e207800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64SqrdmulhSe, new IntrinsicInfo(0x5f00d000u, IntrinsicType.ScalarBinaryByElem));
+ Add(Intrinsic.Arm64SqrdmulhVe, new IntrinsicInfo(0x0f00d000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64SqrdmulhS, new IntrinsicInfo(0x7e20b400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqrdmulhV, new IntrinsicInfo(0x2e20b400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqrshlS, new IntrinsicInfo(0x5e205c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqrshlV, new IntrinsicInfo(0x0e205c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqrshrnS, new IntrinsicInfo(0x5f009c00u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SqrshrnV, new IntrinsicInfo(0x0f009c00u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SqrshrunS, new IntrinsicInfo(0x7f008c00u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SqrshrunV, new IntrinsicInfo(0x2f008c00u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SqshluS, new IntrinsicInfo(0x7f006400u, IntrinsicType.ScalarBinaryShl));
+ Add(Intrinsic.Arm64SqshluV, new IntrinsicInfo(0x2f006400u, IntrinsicType.VectorBinaryShl));
+ Add(Intrinsic.Arm64SqshlSi, new IntrinsicInfo(0x5f007400u, IntrinsicType.ScalarBinaryShl));
+ Add(Intrinsic.Arm64SqshlVi, new IntrinsicInfo(0x0f007400u, IntrinsicType.VectorBinaryShl));
+ Add(Intrinsic.Arm64SqshlS, new IntrinsicInfo(0x5e204c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqshlV, new IntrinsicInfo(0x0e204c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqshrnS, new IntrinsicInfo(0x5f009400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SqshrnV, new IntrinsicInfo(0x0f009400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SqshrunS, new IntrinsicInfo(0x7f008400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SqshrunV, new IntrinsicInfo(0x2f008400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SqsubS, new IntrinsicInfo(0x5e202c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SqsubV, new IntrinsicInfo(0x0e202c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SqxtnS, new IntrinsicInfo(0x5e214800u, IntrinsicType.ScalarBinaryRd));
+ Add(Intrinsic.Arm64SqxtnV, new IntrinsicInfo(0x0e214800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64SqxtunS, new IntrinsicInfo(0x7e212800u, IntrinsicType.ScalarBinaryRd));
+ Add(Intrinsic.Arm64SqxtunV, new IntrinsicInfo(0x2e212800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64SrhaddV, new IntrinsicInfo(0x0e201400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SriS, new IntrinsicInfo(0x7f004400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SriV, new IntrinsicInfo(0x2f004400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SrshlS, new IntrinsicInfo(0x5e205400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SrshlV, new IntrinsicInfo(0x0e205400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SrshrS, new IntrinsicInfo(0x5f002400u, IntrinsicType.ScalarBinaryShr));
+ Add(Intrinsic.Arm64SrshrV, new IntrinsicInfo(0x0f002400u, IntrinsicType.VectorBinaryShr));
+ Add(Intrinsic.Arm64SrsraS, new IntrinsicInfo(0x5f003400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SrsraV, new IntrinsicInfo(0x0f003400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SshllV, new IntrinsicInfo(0x0f00a400u, IntrinsicType.VectorBinaryShl));
+ Add(Intrinsic.Arm64SshlS, new IntrinsicInfo(0x5e204400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SshlV, new IntrinsicInfo(0x0e204400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SshrS, new IntrinsicInfo(0x5f000400u, IntrinsicType.ScalarBinaryShr));
+ Add(Intrinsic.Arm64SshrV, new IntrinsicInfo(0x0f000400u, IntrinsicType.VectorBinaryShr));
+ Add(Intrinsic.Arm64SsraS, new IntrinsicInfo(0x5f001400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64SsraV, new IntrinsicInfo(0x0f001400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64SsublV, new IntrinsicInfo(0x0e202000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SsubwV, new IntrinsicInfo(0x0e203000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64St1Vms, new IntrinsicInfo(0x0c002000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64St1Vss, new IntrinsicInfo(0x0d000000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64St2Vms, new IntrinsicInfo(0x0c008000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64St2Vss, new IntrinsicInfo(0x0d200000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64St3Vms, new IntrinsicInfo(0x0c004000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64St3Vss, new IntrinsicInfo(0x0d002000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64St4Vms, new IntrinsicInfo(0x0c000000u, IntrinsicType.VectorLdSt));
+ Add(Intrinsic.Arm64St4Vss, new IntrinsicInfo(0x0d202000u, IntrinsicType.VectorLdStSs));
+ Add(Intrinsic.Arm64SubhnV, new IntrinsicInfo(0x0e206000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64SubS, new IntrinsicInfo(0x7e208400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64SubV, new IntrinsicInfo(0x2e208400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64SuqaddS, new IntrinsicInfo(0x5e203800u, IntrinsicType.ScalarBinaryRd));
+ Add(Intrinsic.Arm64SuqaddV, new IntrinsicInfo(0x0e203800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64TblV, new IntrinsicInfo(0x0e000000u, IntrinsicType.VectorLookupTable));
+ Add(Intrinsic.Arm64TbxV, new IntrinsicInfo(0x0e001000u, IntrinsicType.VectorLookupTable));
+ Add(Intrinsic.Arm64Trn1V, new IntrinsicInfo(0x0e002800u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64Trn2V, new IntrinsicInfo(0x0e006800u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UabalV, new IntrinsicInfo(0x2e205000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64UabaV, new IntrinsicInfo(0x2e207c00u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64UabdlV, new IntrinsicInfo(0x2e207000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UabdV, new IntrinsicInfo(0x2e207400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UadalpV, new IntrinsicInfo(0x2e206800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64UaddlpV, new IntrinsicInfo(0x2e202800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UaddlvV, new IntrinsicInfo(0x2e303800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UaddlV, new IntrinsicInfo(0x2e200000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UaddwV, new IntrinsicInfo(0x2e201000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UcvtfSFixed, new IntrinsicInfo(0x7f00e400u, IntrinsicType.ScalarFPConvFixed));
+ Add(Intrinsic.Arm64UcvtfVFixed, new IntrinsicInfo(0x2f00e400u, IntrinsicType.VectorFPConvFixed));
+ Add(Intrinsic.Arm64UcvtfS, new IntrinsicInfo(0x7e21d800u, IntrinsicType.ScalarUnary));
+ Add(Intrinsic.Arm64UcvtfV, new IntrinsicInfo(0x2e21d800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UcvtfGpFixed, new IntrinsicInfo(0x1e030000u, IntrinsicType.ScalarFPConvFixedGpr));
+ Add(Intrinsic.Arm64UcvtfGp, new IntrinsicInfo(0x1e230000u, IntrinsicType.ScalarFPConvGpr));
+ Add(Intrinsic.Arm64UhaddV, new IntrinsicInfo(0x2e200400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UhsubV, new IntrinsicInfo(0x2e202400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UmaxpV, new IntrinsicInfo(0x2e20a400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UmaxvV, new IntrinsicInfo(0x2e30a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UmaxV, new IntrinsicInfo(0x2e206400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UminpV, new IntrinsicInfo(0x2e20ac00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UminvV, new IntrinsicInfo(0x2e31a800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UminV, new IntrinsicInfo(0x2e206c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UmlalVe, new IntrinsicInfo(0x2f002000u, IntrinsicType.VectorTernaryRdByElem));
+ Add(Intrinsic.Arm64UmlalV, new IntrinsicInfo(0x2e208000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64UmlslVe, new IntrinsicInfo(0x2f006000u, IntrinsicType.VectorTernaryRdByElem));
+ Add(Intrinsic.Arm64UmlslV, new IntrinsicInfo(0x2e20a000u, IntrinsicType.VectorTernaryRd));
+ Add(Intrinsic.Arm64UmovV, new IntrinsicInfo(0x0e003c00u, IntrinsicType.VectorUnaryByElem));
+ Add(Intrinsic.Arm64UmullVe, new IntrinsicInfo(0x2f00a000u, IntrinsicType.VectorBinaryByElem));
+ Add(Intrinsic.Arm64UmullV, new IntrinsicInfo(0x2e20c000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UqaddS, new IntrinsicInfo(0x7e200c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64UqaddV, new IntrinsicInfo(0x2e200c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UqrshlS, new IntrinsicInfo(0x7e205c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64UqrshlV, new IntrinsicInfo(0x2e205c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UqrshrnS, new IntrinsicInfo(0x7f009c00u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64UqrshrnV, new IntrinsicInfo(0x2f009c00u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64UqshlSi, new IntrinsicInfo(0x7f007400u, IntrinsicType.ScalarBinaryShl));
+ Add(Intrinsic.Arm64UqshlVi, new IntrinsicInfo(0x2f007400u, IntrinsicType.VectorBinaryShl));
+ Add(Intrinsic.Arm64UqshlS, new IntrinsicInfo(0x7e204c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64UqshlV, new IntrinsicInfo(0x2e204c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UqshrnS, new IntrinsicInfo(0x7f009400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64UqshrnV, new IntrinsicInfo(0x2f009400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64UqsubS, new IntrinsicInfo(0x7e202c00u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64UqsubV, new IntrinsicInfo(0x2e202c00u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UqxtnS, new IntrinsicInfo(0x7e214800u, IntrinsicType.ScalarBinaryRd));
+ Add(Intrinsic.Arm64UqxtnV, new IntrinsicInfo(0x2e214800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64UrecpeV, new IntrinsicInfo(0x0ea1c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UrhaddV, new IntrinsicInfo(0x2e201400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UrshlS, new IntrinsicInfo(0x7e205400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64UrshlV, new IntrinsicInfo(0x2e205400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UrshrS, new IntrinsicInfo(0x7f002400u, IntrinsicType.ScalarBinaryShr));
+ Add(Intrinsic.Arm64UrshrV, new IntrinsicInfo(0x2f002400u, IntrinsicType.VectorBinaryShr));
+ Add(Intrinsic.Arm64UrsqrteV, new IntrinsicInfo(0x2ea1c800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64UrsraS, new IntrinsicInfo(0x7f003400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64UrsraV, new IntrinsicInfo(0x2f003400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64UshllV, new IntrinsicInfo(0x2f00a400u, IntrinsicType.VectorBinaryShl));
+ Add(Intrinsic.Arm64UshlS, new IntrinsicInfo(0x7e204400u, IntrinsicType.ScalarBinary));
+ Add(Intrinsic.Arm64UshlV, new IntrinsicInfo(0x2e204400u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UshrS, new IntrinsicInfo(0x7f000400u, IntrinsicType.ScalarBinaryShr));
+ Add(Intrinsic.Arm64UshrV, new IntrinsicInfo(0x2f000400u, IntrinsicType.VectorBinaryShr));
+ Add(Intrinsic.Arm64UsqaddS, new IntrinsicInfo(0x7e203800u, IntrinsicType.ScalarBinaryRd));
+ Add(Intrinsic.Arm64UsqaddV, new IntrinsicInfo(0x2e203800u, IntrinsicType.VectorBinaryRd));
+ Add(Intrinsic.Arm64UsraS, new IntrinsicInfo(0x7f001400u, IntrinsicType.ScalarTernaryShrRd));
+ Add(Intrinsic.Arm64UsraV, new IntrinsicInfo(0x2f001400u, IntrinsicType.VectorTernaryShrRd));
+ Add(Intrinsic.Arm64UsublV, new IntrinsicInfo(0x2e202000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64UsubwV, new IntrinsicInfo(0x2e203000u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64Uzp1V, new IntrinsicInfo(0x0e001800u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64Uzp2V, new IntrinsicInfo(0x0e005800u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64XtnV, new IntrinsicInfo(0x0e212800u, IntrinsicType.VectorUnary));
+ Add(Intrinsic.Arm64Zip1V, new IntrinsicInfo(0x0e003800u, IntrinsicType.VectorBinary));
+ Add(Intrinsic.Arm64Zip2V, new IntrinsicInfo(0x0e007800u, IntrinsicType.VectorBinary));
+ }
+
+ private static void Add(Intrinsic intrin, IntrinsicInfo info)
+ {
+ _intrinTable[(int)intrin] = info;
+ }
+
+ public static IntrinsicInfo GetInfo(Intrinsic intrin)
+ {
+ return _intrinTable[(int)intrin];
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs b/src/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs
new file mode 100644
index 00000000..800eca93
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs
@@ -0,0 +1,59 @@
+namespace ARMeilleure.CodeGen.Arm64
+{
+ enum IntrinsicType
+ {
+ ScalarUnary,
+ ScalarUnaryByElem,
+ ScalarBinary,
+ ScalarBinaryByElem,
+ ScalarBinaryFPByElem,
+ ScalarBinaryRd,
+ ScalarBinaryShl,
+ ScalarBinaryShr,
+ ScalarFcsel,
+ ScalarFmovi,
+ ScalarFPCompare,
+ ScalarFPCompareCond,
+ ScalarFPConv,
+ ScalarFPConvFixed,
+ ScalarFPConvFixedGpr,
+ ScalarFPConvGpr,
+ ScalarTernary,
+ ScalarTernaryFPRdByElem,
+ ScalarTernaryShlRd,
+ ScalarTernaryShrRd,
+
+ VectorUnary,
+ VectorUnaryBitwise,
+ VectorUnaryByElem,
+ VectorBinary,
+ VectorBinaryBitwise,
+ VectorBinaryBitwiseImm,
+ VectorBinaryByElem,
+ VectorBinaryFPByElem,
+ VectorBinaryRd,
+ VectorBinaryShl,
+ VectorBinaryShr,
+ VectorExt,
+ VectorFmovi,
+ VectorFPConvFixed,
+ VectorInsertByElem,
+ VectorLdSt,
+ VectorLdStSs,
+ VectorLookupTable,
+ VectorMovi,
+ VectorMvni,
+ VectorTernaryFPRdByElem,
+ VectorTernaryRd,
+ VectorTernaryRdBitwise,
+ VectorTernaryRdByElem,
+ VectorTernaryShlRd,
+ VectorTernaryShrRd,
+
+ Vector128Unary,
+ Vector128Binary,
+
+ GetRegister,
+ SetRegister
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Arm64/PreAllocator.cs b/src/ARMeilleure/CodeGen/Arm64/PreAllocator.cs
new file mode 100644
index 00000000..6ea9d239
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Arm64/PreAllocator.cs
@@ -0,0 +1,892 @@
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.Arm64
+{
+ static class PreAllocator
+ {
+ private class ConstantDict
+ {
+ private readonly Dictionary<(ulong, OperandType), Operand> _constants;
+
+ public ConstantDict()
+ {
+ _constants = new Dictionary<(ulong, OperandType), Operand>();
+ }
+
+ public void Add(ulong value, OperandType type, Operand local)
+ {
+ _constants.Add((value, type), local);
+ }
+
+ public bool TryGetValue(ulong value, OperandType type, out Operand local)
+ {
+ return _constants.TryGetValue((value, type), out local);
+ }
+ }
+
+ public static void RunPass(CompilerContext cctx, StackAllocator stackAlloc, out int maxCallArgs)
+ {
+ maxCallArgs = -1;
+
+ Span<Operation> buffer = default;
+
+ Operand[] preservedArgs = new Operand[CallingConvention.GetArgumentsOnRegsCount()];
+
+ for (BasicBlock block = cctx.Cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ ConstantDict constants = new ConstantDict();
+
+ Operation nextNode;
+
+ for (Operation node = block.Operations.First; node != default; node = nextNode)
+ {
+ nextNode = node.ListNext;
+
+ if (node.Instruction == Instruction.Phi)
+ {
+ continue;
+ }
+
+ InsertConstantRegCopies(constants, block.Operations, node);
+ InsertDestructiveRegCopies(block.Operations, node);
+
+ switch (node.Instruction)
+ {
+ case Instruction.Call:
+ // Get the maximum number of arguments used on a call.
+ // On windows, when a struct is returned from the call,
+ // we also need to pass the pointer where the struct
+ // should be written on the first argument.
+ int argsCount = node.SourcesCount - 1;
+
+ if (node.Destination != default && node.Destination.Type == OperandType.V128)
+ {
+ argsCount++;
+ }
+
+ if (maxCallArgs < argsCount)
+ {
+ maxCallArgs = argsCount;
+ }
+
+ // Copy values to registers expected by the function
+ // being called, as mandated by the ABI.
+ InsertCallCopies(constants, block.Operations, node);
+ break;
+ case Instruction.CompareAndSwap:
+ case Instruction.CompareAndSwap16:
+ case Instruction.CompareAndSwap8:
+ nextNode = GenerateCompareAndSwap(block.Operations, node);
+ break;
+ case Instruction.LoadArgument:
+ nextNode = InsertLoadArgumentCopy(cctx, ref buffer, block.Operations, preservedArgs, node);
+ break;
+ case Instruction.Return:
+ InsertReturnCopy(block.Operations, node);
+ break;
+ case Instruction.Tailcall:
+ InsertTailcallCopies(constants, block.Operations, stackAlloc, node, node);
+ break;
+ }
+ }
+ }
+ }
+
+ private static void InsertConstantRegCopies(ConstantDict constants, IntrusiveList<Operation> nodes, Operation node)
+ {
+ if (node.SourcesCount == 0 || IsIntrinsicWithConst(node))
+ {
+ return;
+ }
+
+ Instruction inst = node.Instruction;
+
+ Operand src1 = node.GetSource(0);
+ Operand src2;
+
+ if (src1.Kind == OperandKind.Constant)
+ {
+ if (!src1.Type.IsInteger())
+ {
+ // Handle non-integer types (FP32, FP64 and V128).
+ // For instructions without an immediate operand, we do the following:
+ // - Insert a copy with the constant value (as integer) to a GPR.
+ // - Insert a copy from the GPR to a XMM register.
+ // - Replace the constant use with the XMM register.
+ src1 = AddFloatConstantCopy(constants, nodes, node, src1);
+
+ node.SetSource(0, src1);
+ }
+ else if (!HasConstSrc1(node, src1.Value))
+ {
+ // Handle integer types.
+ // Most ALU instructions accepts a 32-bits immediate on the second operand.
+ // We need to ensure the following:
+ // - If the constant is on operand 1, we need to move it.
+ // -- But first, we try to swap operand 1 and 2 if the instruction is commutative.
+ // -- Doing so may allow us to encode the constant as operand 2 and avoid a copy.
+ // - If the constant is on operand 2, we check if the instruction supports it,
+ // if not, we also add a copy. 64-bits constants are usually not supported.
+ if (IsCommutative(node))
+ {
+ src2 = node.GetSource(1);
+
+ Operand temp = src1;
+
+ src1 = src2;
+ src2 = temp;
+
+ node.SetSource(0, src1);
+ node.SetSource(1, src2);
+ }
+
+ if (src1.Kind == OperandKind.Constant)
+ {
+ src1 = AddIntConstantCopy(constants, nodes, node, src1);
+
+ node.SetSource(0, src1);
+ }
+ }
+ }
+
+ if (node.SourcesCount < 2)
+ {
+ return;
+ }
+
+ src2 = node.GetSource(1);
+
+ if (src2.Kind == OperandKind.Constant)
+ {
+ if (!src2.Type.IsInteger())
+ {
+ src2 = AddFloatConstantCopy(constants, nodes, node, src2);
+
+ node.SetSource(1, src2);
+ }
+ else if (!HasConstSrc2(inst, src2))
+ {
+ src2 = AddIntConstantCopy(constants, nodes, node, src2);
+
+ node.SetSource(1, src2);
+ }
+ }
+
+ if (node.SourcesCount < 3 ||
+ node.Instruction == Instruction.BranchIf ||
+ node.Instruction == Instruction.Compare ||
+ node.Instruction == Instruction.VectorInsert ||
+ node.Instruction == Instruction.VectorInsert16 ||
+ node.Instruction == Instruction.VectorInsert8)
+ {
+ return;
+ }
+
+ for (int srcIndex = 2; srcIndex < node.SourcesCount; srcIndex++)
+ {
+ Operand src = node.GetSource(srcIndex);
+
+ if (src.Kind == OperandKind.Constant)
+ {
+ if (!src.Type.IsInteger())
+ {
+ src = AddFloatConstantCopy(constants, nodes, node, src);
+
+ node.SetSource(srcIndex, src);
+ }
+ else
+ {
+ src = AddIntConstantCopy(constants, nodes, node, src);
+
+ node.SetSource(srcIndex, src);
+ }
+ }
+ }
+ }
+
+ private static void InsertDestructiveRegCopies(IntrusiveList<Operation> nodes, Operation node)
+ {
+ if (node.Destination == default || node.SourcesCount == 0)
+ {
+ return;
+ }
+
+ Operand dest = node.Destination;
+ Operand src1 = node.GetSource(0);
+
+ if (IsSameOperandDestSrc1(node) && src1.Kind == OperandKind.LocalVariable)
+ {
+ bool useNewLocal = false;
+
+ for (int srcIndex = 1; srcIndex < node.SourcesCount; srcIndex++)
+ {
+ if (node.GetSource(srcIndex) == dest)
+ {
+ useNewLocal = true;
+
+ break;
+ }
+ }
+
+ if (useNewLocal)
+ {
+ // Dest is being used as some source already, we need to use a new
+ // local to store the temporary value, otherwise the value on dest
+ // local would be overwritten.
+ Operand temp = Local(dest.Type);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, temp, src1));
+
+ node.SetSource(0, temp);
+
+ nodes.AddAfter(node, Operation(Instruction.Copy, dest, temp));
+
+ node.Destination = temp;
+ }
+ else
+ {
+ nodes.AddBefore(node, Operation(Instruction.Copy, dest, src1));
+
+ node.SetSource(0, dest);
+ }
+ }
+ }
+
+ private static void InsertCallCopies(ConstantDict constants, IntrusiveList<Operation> nodes, Operation node)
+ {
+ Operation operation = node;
+
+ Operand dest = operation.Destination;
+
+ List<Operand> sources = new List<Operand>
+ {
+ operation.GetSource(0)
+ };
+
+ int argsCount = operation.SourcesCount - 1;
+
+ int intMax = CallingConvention.GetArgumentsOnRegsCount();
+ int vecMax = CallingConvention.GetArgumentsOnRegsCount();
+
+ int intCount = 0;
+ int vecCount = 0;
+
+ int stackOffset = 0;
+
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = operation.GetSource(index + 1);
+
+ bool passOnReg;
+
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount < intMax;
+ }
+ else if (source.Type == OperandType.V128)
+ {
+ passOnReg = intCount + 1 < intMax;
+ }
+ else
+ {
+ passOnReg = vecCount < vecMax;
+ }
+
+ if (source.Type == OperandType.V128 && passOnReg)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+ Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1)));
+
+ continue;
+ }
+
+ if (passOnReg)
+ {
+ Operand argReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, argReg, source);
+
+ InsertConstantRegCopies(constants, nodes, nodes.AddBefore(node, copyOp));
+
+ sources.Add(argReg);
+ }
+ else
+ {
+ Operand offset = Const(stackOffset);
+
+ Operation spillOp = Operation(Instruction.SpillArg, default, offset, source);
+
+ InsertConstantRegCopies(constants, nodes, nodes.AddBefore(node, spillOp));
+
+ stackOffset += source.Type.GetSizeInBytes();
+ }
+ }
+
+ if (dest != default)
+ {
+ if (dest.Type == OperandType.V128)
+ {
+ Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+ Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
+
+ node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, retLReg));
+ nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, retHReg, Const(1)));
+
+ operation.Destination = default;
+ }
+ else
+ {
+ Operand retReg = dest.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type)
+ : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, dest, retReg);
+
+ nodes.AddAfter(node, copyOp);
+
+ operation.Destination = retReg;
+ }
+ }
+
+ operation.SetSources(sources.ToArray());
+ }
+
+ private static void InsertTailcallCopies(
+ ConstantDict constants,
+ IntrusiveList<Operation> nodes,
+ StackAllocator stackAlloc,
+ Operation node,
+ Operation operation)
+ {
+ List<Operand> sources = new List<Operand>
+ {
+ operation.GetSource(0)
+ };
+
+ int argsCount = operation.SourcesCount - 1;
+
+ int intMax = CallingConvention.GetArgumentsOnRegsCount();
+ int vecMax = CallingConvention.GetArgumentsOnRegsCount();
+
+ int intCount = 0;
+ int vecCount = 0;
+
+ // Handle arguments passed on registers.
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = operation.GetSource(1 + index);
+
+ bool passOnReg;
+
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount + 1 < intMax;
+ }
+ else
+ {
+ passOnReg = vecCount < vecMax;
+ }
+
+ if (source.Type == OperandType.V128 && passOnReg)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+ Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1)));
+
+ continue;
+ }
+
+ if (passOnReg)
+ {
+ Operand argReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, argReg, source);
+
+ InsertConstantRegCopies(constants, nodes, nodes.AddBefore(node, copyOp));
+
+ sources.Add(argReg);
+ }
+ else
+ {
+ throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)");
+ }
+ }
+
+ // The target address must be on the return registers, since we
+ // don't return anything and it is guaranteed to not be a
+ // callee saved register (which would be trashed on the epilogue).
+ Operand tcAddress = Gpr(CodeGenCommon.TcAddressRegister, OperandType.I64);
+
+ Operation addrCopyOp = Operation(Instruction.Copy, tcAddress, operation.GetSource(0));
+
+ nodes.AddBefore(node, addrCopyOp);
+
+ sources[0] = tcAddress;
+
+ operation.SetSources(sources.ToArray());
+ }
+
+ private static Operation GenerateCompareAndSwap(IntrusiveList<Operation> nodes, Operation node)
+ {
+ Operand expected = node.GetSource(1);
+
+ if (expected.Type == OperandType.V128)
+ {
+ Operand dest = node.Destination;
+ Operand expectedLow = Local(OperandType.I64);
+ Operand expectedHigh = Local(OperandType.I64);
+ Operand desiredLow = Local(OperandType.I64);
+ Operand desiredHigh = Local(OperandType.I64);
+ Operand actualLow = Local(OperandType.I64);
+ Operand actualHigh = Local(OperandType.I64);
+
+ Operand address = node.GetSource(0);
+ Operand desired = node.GetSource(2);
+
+ void SplitOperand(Operand source, Operand low, Operand high)
+ {
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, low, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, high, source, Const(1)));
+ }
+
+ SplitOperand(expected, expectedLow, expectedHigh);
+ SplitOperand(desired, desiredLow, desiredHigh);
+
+ Operation operation = node;
+
+ // Update the sources and destinations with split 64-bit halfs of the whole 128-bit values.
+ // We also need a additional registers that will be used to store temporary information.
+ operation.SetDestinations(new[] { actualLow, actualHigh, Local(OperandType.I64), Local(OperandType.I64) });
+ operation.SetSources(new[] { address, expectedLow, expectedHigh, desiredLow, desiredHigh });
+
+ // Add some dummy uses of the input operands, as the CAS operation will be a loop,
+ // so they can't be used as destination operand.
+ for (int i = 0; i < operation.SourcesCount; i++)
+ {
+ Operand src = operation.GetSource(i);
+ node = nodes.AddAfter(node, Operation(Instruction.Copy, src, src));
+ }
+
+ // Assemble the vector with the 64-bit values at the given memory location.
+ node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, actualLow));
+ node = nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, actualHigh, Const(1)));
+ }
+ else
+ {
+ // We need a additional register where the store result will be written to.
+ node.SetDestinations(new[] { node.Destination, Local(OperandType.I32) });
+
+ // Add some dummy uses of the input operands, as the CAS operation will be a loop,
+ // so they can't be used as destination operand.
+ Operation operation = node;
+
+ for (int i = 0; i < operation.SourcesCount; i++)
+ {
+ Operand src = operation.GetSource(i);
+ node = nodes.AddAfter(node, Operation(Instruction.Copy, src, src));
+ }
+ }
+
+ return node.ListNext;
+ }
+
+ private static void InsertReturnCopy(IntrusiveList<Operation> nodes, Operation node)
+ {
+ if (node.SourcesCount == 0)
+ {
+ return;
+ }
+
+ Operand source = node.GetSource(0);
+
+ if (source.Type == OperandType.V128)
+ {
+ Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+ Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
+
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, retLReg, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, retHReg, source, Const(1)));
+ }
+ else
+ {
+ Operand retReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntReturnRegister(), source.Type)
+ : Xmm(CallingConvention.GetVecReturnRegister(), source.Type);
+
+ Operation retCopyOp = Operation(Instruction.Copy, retReg, source);
+
+ nodes.AddBefore(node, retCopyOp);
+ }
+ }
+
+ private static Operation InsertLoadArgumentCopy(
+ CompilerContext cctx,
+ ref Span<Operation> buffer,
+ IntrusiveList<Operation> nodes,
+ Operand[] preservedArgs,
+ Operation node)
+ {
+ Operand source = node.GetSource(0);
+
+ Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind.");
+
+ int index = source.AsInt32();
+
+ int intCount = 0;
+ int vecCount = 0;
+
+ for (int cIndex = 0; cIndex < index; cIndex++)
+ {
+ OperandType argType = cctx.FuncArgTypes[cIndex];
+
+ if (argType.IsInteger())
+ {
+ intCount++;
+ }
+ else if (argType == OperandType.V128)
+ {
+ intCount += 2;
+ }
+ else
+ {
+ vecCount++;
+ }
+ }
+
+ bool passOnReg;
+
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount < CallingConvention.GetArgumentsOnRegsCount();
+ }
+ else if (source.Type == OperandType.V128)
+ {
+ passOnReg = intCount + 1 < CallingConvention.GetArgumentsOnRegsCount();
+ }
+ else
+ {
+ passOnReg = vecCount < CallingConvention.GetArgumentsOnRegsCount();
+ }
+
+ if (passOnReg)
+ {
+ Operand dest = node.Destination;
+
+ if (preservedArgs[index] == default)
+ {
+ if (dest.Type == OperandType.V128)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand pArg = Local(OperandType.V128);
+
+ Operand argLReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount), OperandType.I64);
+ Operand argHReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount + 1), OperandType.I64);
+
+ Operation copyL = Operation(Instruction.VectorCreateScalar, pArg, argLReg);
+ Operation copyH = Operation(Instruction.VectorInsert, pArg, pArg, argHReg, Const(1));
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyH);
+ cctx.Cfg.Entry.Operations.AddFirst(copyL);
+
+ preservedArgs[index] = pArg;
+ }
+ else
+ {
+ Operand pArg = Local(dest.Type);
+
+ Operand argReg = dest.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount), dest.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount), dest.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, pArg, argReg);
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyOp);
+
+ preservedArgs[index] = pArg;
+ }
+ }
+
+ Operation nextNode;
+
+ if (dest.AssignmentsCount == 1)
+ {
+ // Let's propagate the argument if we can to avoid copies.
+ PreAllocatorCommon.Propagate(ref buffer, dest, preservedArgs[index]);
+ nextNode = node.ListNext;
+ }
+ else
+ {
+ Operation argCopyOp = Operation(Instruction.Copy, dest, preservedArgs[index]);
+ nextNode = nodes.AddBefore(node, argCopyOp);
+ }
+
+ Delete(nodes, node);
+ return nextNode;
+ }
+ else
+ {
+ // TODO: Pass on stack.
+ return node;
+ }
+ }
+
+ private static Operand AddFloatConstantCopy(
+ ConstantDict constants,
+ IntrusiveList<Operation> nodes,
+ Operation node,
+ Operand source)
+ {
+ Operand temp = Local(source.Type);
+
+ Operand intConst = AddIntConstantCopy(constants, nodes, node, GetIntConst(source));
+
+ Operation copyOp = Operation(Instruction.VectorCreateScalar, temp, intConst);
+
+ nodes.AddBefore(node, copyOp);
+
+ return temp;
+ }
+
+ private static Operand AddIntConstantCopy(
+ ConstantDict constants,
+ IntrusiveList<Operation> nodes,
+ Operation node,
+ Operand source)
+ {
+ if (constants.TryGetValue(source.Value, source.Type, out Operand temp))
+ {
+ return temp;
+ }
+
+ temp = Local(source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, temp, source);
+
+ nodes.AddBefore(node, copyOp);
+
+ constants.Add(source.Value, source.Type, temp);
+
+ return temp;
+ }
+
+ private static Operand GetIntConst(Operand value)
+ {
+ if (value.Type == OperandType.FP32)
+ {
+ return Const(value.AsInt32());
+ }
+ else if (value.Type == OperandType.FP64)
+ {
+ return Const(value.AsInt64());
+ }
+
+ return value;
+ }
+
+ private static void Delete(IntrusiveList<Operation> nodes, Operation node)
+ {
+ node.Destination = default;
+
+ for (int index = 0; index < node.SourcesCount; index++)
+ {
+ node.SetSource(index, default);
+ }
+
+ nodes.Remove(node);
+ }
+
+ private static Operand Gpr(int register, OperandType type)
+ {
+ return Register(register, RegisterType.Integer, type);
+ }
+
+ private static Operand Xmm(int register, OperandType type)
+ {
+ return Register(register, RegisterType.Vector, type);
+ }
+
+ private static bool IsSameOperandDestSrc1(Operation operation)
+ {
+ switch (operation.Instruction)
+ {
+ case Instruction.Extended:
+ return IsSameOperandDestSrc1(operation.Intrinsic);
+ case Instruction.VectorInsert:
+ case Instruction.VectorInsert16:
+ case Instruction.VectorInsert8:
+ return true;
+ }
+
+ return false;
+ }
+
+ private static bool IsSameOperandDestSrc1(Intrinsic intrinsic)
+ {
+ IntrinsicInfo info = IntrinsicTable.GetInfo(intrinsic & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask));
+
+ return info.Type == IntrinsicType.ScalarBinaryRd ||
+ info.Type == IntrinsicType.ScalarTernaryFPRdByElem ||
+ info.Type == IntrinsicType.ScalarTernaryShlRd ||
+ info.Type == IntrinsicType.ScalarTernaryShrRd ||
+ info.Type == IntrinsicType.VectorBinaryRd ||
+ info.Type == IntrinsicType.VectorInsertByElem ||
+ info.Type == IntrinsicType.VectorTernaryRd ||
+ info.Type == IntrinsicType.VectorTernaryRdBitwise ||
+ info.Type == IntrinsicType.VectorTernaryFPRdByElem ||
+ info.Type == IntrinsicType.VectorTernaryRdByElem ||
+ info.Type == IntrinsicType.VectorTernaryShlRd ||
+ info.Type == IntrinsicType.VectorTernaryShrRd;
+ }
+
+ private static bool HasConstSrc1(Operation node, ulong value)
+ {
+ switch (node.Instruction)
+ {
+ case Instruction.Add:
+ case Instruction.BranchIf:
+ case Instruction.Compare:
+ case Instruction.Subtract:
+ // The immediate encoding of those instructions does not allow Rn to be
+ // XZR (it will be SP instead), so we can't allow a Rn constant in this case.
+ return value == 0 && NotConstOrConst0(node.GetSource(1));
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseNot:
+ case Instruction.BitwiseOr:
+ case Instruction.ByteSwap:
+ case Instruction.CountLeadingZeros:
+ case Instruction.Multiply:
+ case Instruction.Negate:
+ case Instruction.RotateRight:
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ return value == 0;
+ case Instruction.Copy:
+ case Instruction.LoadArgument:
+ case Instruction.Spill:
+ case Instruction.SpillArg:
+ return true;
+ case Instruction.Extended:
+ return value == 0;
+ }
+
+ return false;
+ }
+
+ private static bool NotConstOrConst0(Operand operand)
+ {
+ return operand.Kind != OperandKind.Constant || operand.Value == 0;
+ }
+
+ private static bool HasConstSrc2(Instruction inst, Operand operand)
+ {
+ ulong value = operand.Value;
+
+ switch (inst)
+ {
+ case Instruction.Add:
+ case Instruction.BranchIf:
+ case Instruction.Compare:
+ case Instruction.Subtract:
+ return ConstFitsOnUImm12Sh(value);
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseOr:
+ return value == 0 || CodeGenCommon.TryEncodeBitMask(operand, out _, out _, out _);
+ case Instruction.Multiply:
+ case Instruction.Store:
+ case Instruction.Store16:
+ case Instruction.Store8:
+ return value == 0;
+ case Instruction.RotateRight:
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ case Instruction.VectorExtract:
+ case Instruction.VectorExtract16:
+ case Instruction.VectorExtract8:
+ return true;
+ case Instruction.Extended:
+ // TODO: Check if actual intrinsic is supposed to have consts here?
+ // Right now we only hit this case for fixed-point int <-> FP conversion instructions.
+ return true;
+ }
+
+ return false;
+ }
+
+ private static bool IsCommutative(Operation operation)
+ {
+ switch (operation.Instruction)
+ {
+ case Instruction.Add:
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseOr:
+ case Instruction.Multiply:
+ return true;
+
+ case Instruction.BranchIf:
+ case Instruction.Compare:
+ {
+ Operand comp = operation.GetSource(2);
+
+ Debug.Assert(comp.Kind == OperandKind.Constant);
+
+ var compType = (Comparison)comp.AsInt32();
+
+ return compType == Comparison.Equal || compType == Comparison.NotEqual;
+ }
+ }
+
+ return false;
+ }
+
+ private static bool ConstFitsOnUImm12Sh(ulong value)
+ {
+ return (value & ~0xfffUL) == 0 || (value & ~0xfff000UL) == 0;
+ }
+
+ private static bool IsIntrinsicWithConst(Operation operation)
+ {
+ bool isIntrinsic = IsIntrinsic(operation.Instruction);
+
+ if (isIntrinsic)
+ {
+ Intrinsic intrinsic = operation.Intrinsic;
+ IntrinsicInfo info = IntrinsicTable.GetInfo(intrinsic & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask));
+
+ // Those have integer inputs that don't support consts.
+ return info.Type != IntrinsicType.ScalarFPConvGpr &&
+ info.Type != IntrinsicType.ScalarFPConvFixedGpr &&
+ info.Type != IntrinsicType.SetRegister;
+ }
+
+ return false;
+ }
+
+ private static bool IsIntrinsic(Instruction inst)
+ {
+ return inst == Instruction.Extended;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/CompiledFunction.cs b/src/ARMeilleure/CodeGen/CompiledFunction.cs
new file mode 100644
index 00000000..0560bf2e
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/CompiledFunction.cs
@@ -0,0 +1,68 @@
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.CodeGen.Unwinding;
+using ARMeilleure.Translation.Cache;
+using System;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.CodeGen
+{
+ /// <summary>
+ /// Represents a compiled function.
+ /// </summary>
+ readonly struct CompiledFunction
+ {
+ /// <summary>
+ /// Gets the machine code of the <see cref="CompiledFunction"/>.
+ /// </summary>
+ public byte[] Code { get; }
+
+ /// <summary>
+ /// Gets the <see cref="Unwinding.UnwindInfo"/> of the <see cref="CompiledFunction"/>.
+ /// </summary>
+ public UnwindInfo UnwindInfo { get; }
+
+ /// <summary>
+ /// Gets the <see cref="Linking.RelocInfo"/> of the <see cref="CompiledFunction"/>.
+ /// </summary>
+ public RelocInfo RelocInfo { get; }
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="CompiledFunction"/> struct with the specified machine code,
+ /// unwind info and relocation info.
+ /// </summary>
+ /// <param name="code">Machine code</param>
+ /// <param name="unwindInfo">Unwind info</param>
+ /// <param name="relocInfo">Relocation info</param>
+ internal CompiledFunction(byte[] code, UnwindInfo unwindInfo, RelocInfo relocInfo)
+ {
+ Code = code;
+ UnwindInfo = unwindInfo;
+ RelocInfo = relocInfo;
+ }
+
+ /// <summary>
+ /// Maps the <see cref="CompiledFunction"/> onto the <see cref="JitCache"/> and returns a delegate of type
+ /// <typeparamref name="T"/> pointing to the mapped function.
+ /// </summary>
+ /// <typeparam name="T">Type of delegate</typeparam>
+ /// <returns>A delegate of type <typeparamref name="T"/> pointing to the mapped function</returns>
+ public T Map<T>()
+ {
+ return MapWithPointer<T>(out _);
+ }
+
+ /// <summary>
+ /// Maps the <see cref="CompiledFunction"/> onto the <see cref="JitCache"/> and returns a delegate of type
+ /// <typeparamref name="T"/> pointing to the mapped function.
+ /// </summary>
+ /// <typeparam name="T">Type of delegate</typeparam>
+ /// <param name="codePointer">Pointer to the function code in memory</param>
+ /// <returns>A delegate of type <typeparamref name="T"/> pointing to the mapped function</returns>
+ public T MapWithPointer<T>(out IntPtr codePointer)
+ {
+ codePointer = JitCache.Map(this);
+
+ return Marshal.GetDelegateForFunctionPointer<T>(codePointer);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Linking/RelocEntry.cs b/src/ARMeilleure/CodeGen/Linking/RelocEntry.cs
new file mode 100644
index 00000000..a27bfded
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Linking/RelocEntry.cs
@@ -0,0 +1,38 @@
+namespace ARMeilleure.CodeGen.Linking
+{
+ /// <summary>
+ /// Represents a relocation.
+ /// </summary>
+ readonly struct RelocEntry
+ {
+ public const int Stride = 13; // Bytes.
+
+ /// <summary>
+ /// Gets the position of the relocation.
+ /// </summary>
+ public int Position { get; }
+
+ /// <summary>
+ /// Gets the <see cref="Symbol"/> of the relocation.
+ /// </summary>
+ public Symbol Symbol { get; }
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="RelocEntry"/> struct with the specified position and
+ /// <see cref="Symbol"/>.
+ /// </summary>
+ /// <param name="position">Position of relocation</param>
+ /// <param name="symbol">Symbol of relocation</param>
+ public RelocEntry(int position, Symbol symbol)
+ {
+ Position = position;
+ Symbol = symbol;
+ }
+
+ /// <inheritdoc/>
+ public override string ToString()
+ {
+ return $"({nameof(Position)} = {Position}, {nameof(Symbol)} = {Symbol})";
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Linking/RelocInfo.cs b/src/ARMeilleure/CodeGen/Linking/RelocInfo.cs
new file mode 100644
index 00000000..caaf08e3
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Linking/RelocInfo.cs
@@ -0,0 +1,32 @@
+using System;
+
+namespace ARMeilleure.CodeGen.Linking
+{
+ /// <summary>
+ /// Represents relocation information about a <see cref="CompiledFunction"/>.
+ /// </summary>
+ readonly struct RelocInfo
+ {
+ /// <summary>
+ /// Gets an empty <see cref="RelocInfo"/>.
+ /// </summary>
+ public static RelocInfo Empty { get; } = new RelocInfo(null);
+
+ private readonly RelocEntry[] _entries;
+
+ /// <summary>
+ /// Gets the set of <see cref="RelocEntry"/>.
+ /// </summary>
+ public ReadOnlySpan<RelocEntry> Entries => _entries;
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="RelocInfo"/> struct with the specified set of
+ /// <see cref="RelocEntry"/>.
+ /// </summary>
+ /// <param name="entries">Set of <see cref="RelocInfo"/> to use</param>
+ public RelocInfo(RelocEntry[] entries)
+ {
+ _entries = entries;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Linking/Symbol.cs b/src/ARMeilleure/CodeGen/Linking/Symbol.cs
new file mode 100644
index 00000000..39e0c3eb
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Linking/Symbol.cs
@@ -0,0 +1,99 @@
+using System;
+
+namespace ARMeilleure.CodeGen.Linking
+{
+ /// <summary>
+ /// Represents a symbol.
+ /// </summary>
+ readonly struct Symbol
+ {
+ private readonly ulong _value;
+
+ /// <summary>
+ /// Gets the <see cref="SymbolType"/> of the <see cref="Symbol"/>.
+ /// </summary>
+ public SymbolType Type { get; }
+
+ /// <summary>
+ /// Gets the value of the <see cref="Symbol"/>.
+ /// </summary>
+ /// <exception cref="InvalidOperationException"><see cref="Type"/> is <see cref="SymbolType.None"/></exception>
+ public ulong Value
+ {
+ get
+ {
+ if (Type == SymbolType.None)
+ {
+ ThrowSymbolNone();
+ }
+
+ return _value;
+ }
+ }
+
+ /// <summary>
+ /// Initializes a new instance of the <see cref="Symbol"/> structure with the specified <see cref="SymbolType"/> and value.
+ /// </summary>
+ /// <param name="type">Type of symbol</param>
+ /// <param name="value">Value of symbol</param>
+ public Symbol(SymbolType type, ulong value)
+ {
+ (Type, _value) = (type, value);
+ }
+
+ /// <summary>
+ /// Determines if the specified <see cref="Symbol"/> instances are equal.
+ /// </summary>
+ /// <param name="a">First instance</param>
+ /// <param name="b">Second instance</param>
+ /// <returns><see langword="true"/> if equal; otherwise <see langword="false"/></returns>
+ public static bool operator ==(Symbol a, Symbol b)
+ {
+ return a.Equals(b);
+ }
+
+ /// <summary>
+ /// Determines if the specified <see cref="Symbol"/> instances are not equal.
+ /// </summary>
+ /// <param name="a">First instance</param>
+ /// <param name="b">Second instance</param>
+ /// <returns><see langword="true"/> if not equal; otherwise <see langword="false"/></returns>
+ public static bool operator !=(Symbol a, Symbol b)
+ {
+ return !(a == b);
+ }
+
+ /// <summary>
+ /// Determines if the specified <see cref="Symbol"/> is equal to this <see cref="Symbol"/> instance.
+ /// </summary>
+ /// <param name="other">Other <see cref="Symbol"/> instance</param>
+ /// <returns><see langword="true"/> if equal; otherwise <see langword="false"/></returns>
+ public bool Equals(Symbol other)
+ {
+ return other.Type == Type && other._value == _value;
+ }
+
+ /// <inheritdoc/>
+ public override bool Equals(object obj)
+ {
+ return obj is Symbol sym && Equals(sym);
+ }
+
+ /// <inheritdoc/>
+ public override int GetHashCode()
+ {
+ return HashCode.Combine(Type, _value);
+ }
+
+ /// <inheritdoc/>
+ public override string ToString()
+ {
+ return $"{Type}:{_value}";
+ }
+
+ private static void ThrowSymbolNone()
+ {
+ throw new InvalidOperationException("Symbol refers to nothing.");
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Linking/SymbolType.cs b/src/ARMeilleure/CodeGen/Linking/SymbolType.cs
new file mode 100644
index 00000000..b05b6969
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Linking/SymbolType.cs
@@ -0,0 +1,28 @@
+namespace ARMeilleure.CodeGen.Linking
+{
+ /// <summary>
+ /// Types of <see cref="Symbol"/>.
+ /// </summary>
+ enum SymbolType : byte
+ {
+ /// <summary>
+ /// Refers to nothing, i.e no symbol.
+ /// </summary>
+ None,
+
+ /// <summary>
+ /// Refers to an entry in <see cref="Translation.Delegates"/>.
+ /// </summary>
+ DelegateTable,
+
+ /// <summary>
+ /// Refers to an entry in <see cref="Translation.Translator.FunctionTable"/>.
+ /// </summary>
+ FunctionTable,
+
+ /// <summary>
+ /// Refers to a special symbol which is handled by <see cref="Translation.PTC.Ptc.PatchCode"/>.
+ /// </summary>
+ Special
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs b/src/ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs
new file mode 100644
index 00000000..9e243d37
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs
@@ -0,0 +1,72 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System.Diagnostics;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+ static class BlockPlacement
+ {
+ public static void RunPass(ControlFlowGraph cfg)
+ {
+ bool update = false;
+
+ BasicBlock block;
+ BasicBlock nextBlock;
+
+ BasicBlock lastBlock = cfg.Blocks.Last;
+
+ // Move cold blocks at the end of the list, so that they are emitted away from hot code.
+ for (block = cfg.Blocks.First; block != null; block = nextBlock)
+ {
+ nextBlock = block.ListNext;
+
+ if (block.Frequency == BasicBlockFrequency.Cold)
+ {
+ cfg.Blocks.Remove(block);
+ cfg.Blocks.AddLast(block);
+ }
+
+ if (block == lastBlock)
+ {
+ break;
+ }
+ }
+
+ for (block = cfg.Blocks.First; block != null; block = nextBlock)
+ {
+ nextBlock = block.ListNext;
+
+ if (block.SuccessorsCount == 2)
+ {
+ Operation branchOp = block.Operations.Last;
+
+ Debug.Assert(branchOp.Instruction == Instruction.BranchIf);
+
+ BasicBlock falseSucc = block.GetSuccessor(0);
+ BasicBlock trueSucc = block.GetSuccessor(1);
+
+ // If true successor is next block in list, invert the condition. We avoid extra branching by
+ // making the true side the fallthrough (i.e, convert it to the false side).
+ if (trueSucc == block.ListNext)
+ {
+ Comparison comp = (Comparison)branchOp.GetSource(2).AsInt32();
+ Comparison compInv = comp.Invert();
+
+ branchOp.SetSource(2, Const((int)compInv));
+
+ block.SetSuccessor(0, trueSucc);
+ block.SetSuccessor(1, falseSucc);
+
+ update = true;
+ }
+ }
+ }
+
+ if (update)
+ {
+ cfg.Update();
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs b/src/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs
new file mode 100644
index 00000000..c5a22a53
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs
@@ -0,0 +1,346 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+ static class ConstantFolding
+ {
+ public static void RunPass(Operation operation)
+ {
+ if (operation.Destination == default || operation.SourcesCount == 0)
+ {
+ return;
+ }
+
+ if (!AreAllSourcesConstant(operation))
+ {
+ return;
+ }
+
+ OperandType type = operation.Destination.Type;
+
+ switch (operation.Instruction)
+ {
+ case Instruction.Add:
+ if (operation.GetSource(0).Relocatable ||
+ operation.GetSource(1).Relocatable)
+ {
+ break;
+ }
+
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x + y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x + y);
+ }
+ break;
+
+ case Instruction.BitwiseAnd:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x & y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x & y);
+ }
+ break;
+
+ case Instruction.BitwiseExclusiveOr:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x ^ y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x ^ y);
+ }
+ break;
+
+ case Instruction.BitwiseNot:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => ~x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => ~x);
+ }
+ break;
+
+ case Instruction.BitwiseOr:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x | y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x | y);
+ }
+ break;
+
+ case Instruction.ConvertI64ToI32:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => x);
+ }
+ break;
+
+ case Instruction.Compare:
+ if (type == OperandType.I32 &&
+ operation.GetSource(0).Type == type &&
+ operation.GetSource(1).Type == type)
+ {
+ switch ((Comparison)operation.GetSource(2).Value)
+ {
+ case Comparison.Equal:
+ EvaluateBinaryI32(operation, (x, y) => x == y ? 1 : 0);
+ break;
+ case Comparison.NotEqual:
+ EvaluateBinaryI32(operation, (x, y) => x != y ? 1 : 0);
+ break;
+ case Comparison.Greater:
+ EvaluateBinaryI32(operation, (x, y) => x > y ? 1 : 0);
+ break;
+ case Comparison.LessOrEqual:
+ EvaluateBinaryI32(operation, (x, y) => x <= y ? 1 : 0);
+ break;
+ case Comparison.GreaterUI:
+ EvaluateBinaryI32(operation, (x, y) => (uint)x > (uint)y ? 1 : 0);
+ break;
+ case Comparison.LessOrEqualUI:
+ EvaluateBinaryI32(operation, (x, y) => (uint)x <= (uint)y ? 1 : 0);
+ break;
+ case Comparison.GreaterOrEqual:
+ EvaluateBinaryI32(operation, (x, y) => x >= y ? 1 : 0);
+ break;
+ case Comparison.Less:
+ EvaluateBinaryI32(operation, (x, y) => x < y ? 1 : 0);
+ break;
+ case Comparison.GreaterOrEqualUI:
+ EvaluateBinaryI32(operation, (x, y) => (uint)x >= (uint)y ? 1 : 0);
+ break;
+ case Comparison.LessUI:
+ EvaluateBinaryI32(operation, (x, y) => (uint)x < (uint)y ? 1 : 0);
+ break;
+ }
+ }
+ break;
+
+ case Instruction.Copy:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => x);
+ }
+ break;
+
+ case Instruction.Divide:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => y != 0 ? x / y : 0);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => y != 0 ? x / y : 0);
+ }
+ break;
+
+ case Instruction.DivideUI:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => y != 0 ? (int)((uint)x / (uint)y) : 0);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => y != 0 ? (long)((ulong)x / (ulong)y) : 0);
+ }
+ break;
+
+ case Instruction.Multiply:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x * y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x * y);
+ }
+ break;
+
+ case Instruction.Negate:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => -x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => -x);
+ }
+ break;
+
+ case Instruction.ShiftLeft:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x << y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x << (int)y);
+ }
+ break;
+
+ case Instruction.ShiftRightSI:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x >> y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x >> (int)y);
+ }
+ break;
+
+ case Instruction.ShiftRightUI:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => (int)((uint)x >> y));
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => (long)((ulong)x >> (int)y));
+ }
+ break;
+
+ case Instruction.SignExtend16:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => (short)x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => (short)x);
+ }
+ break;
+
+ case Instruction.SignExtend32:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => (int)x);
+ }
+ break;
+
+ case Instruction.SignExtend8:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => (sbyte)x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => (sbyte)x);
+ }
+ break;
+
+ case Instruction.ZeroExtend16:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => (ushort)x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => (ushort)x);
+ }
+ break;
+
+ case Instruction.ZeroExtend32:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => (uint)x);
+ }
+ break;
+
+ case Instruction.ZeroExtend8:
+ if (type == OperandType.I32)
+ {
+ EvaluateUnaryI32(operation, (x) => (byte)x);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateUnaryI64(operation, (x) => (byte)x);
+ }
+ break;
+
+ case Instruction.Subtract:
+ if (type == OperandType.I32)
+ {
+ EvaluateBinaryI32(operation, (x, y) => x - y);
+ }
+ else if (type == OperandType.I64)
+ {
+ EvaluateBinaryI64(operation, (x, y) => x - y);
+ }
+ break;
+ }
+ }
+
+ private static bool AreAllSourcesConstant(Operation operation)
+ {
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ Operand srcOp = operation.GetSource(index);
+
+ if (srcOp.Kind != OperandKind.Constant)
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ private static void EvaluateUnaryI32(Operation operation, Func<int, int> op)
+ {
+ int x = operation.GetSource(0).AsInt32();
+
+ operation.TurnIntoCopy(Const(op(x)));
+ }
+
+ private static void EvaluateUnaryI64(Operation operation, Func<long, long> op)
+ {
+ long x = operation.GetSource(0).AsInt64();
+
+ operation.TurnIntoCopy(Const(op(x)));
+ }
+
+ private static void EvaluateBinaryI32(Operation operation, Func<int, int, int> op)
+ {
+ int x = operation.GetSource(0).AsInt32();
+ int y = operation.GetSource(1).AsInt32();
+
+ operation.TurnIntoCopy(Const(op(x, y)));
+ }
+
+ private static void EvaluateBinaryI64(Operation operation, Func<long, long, long> op)
+ {
+ long x = operation.GetSource(0).AsInt64();
+ long y = operation.GetSource(1).AsInt64();
+
+ operation.TurnIntoCopy(Const(op(x, y)));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Optimizations/Optimizer.cs b/src/ARMeilleure/CodeGen/Optimizations/Optimizer.cs
new file mode 100644
index 00000000..a45bb455
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Optimizations/Optimizer.cs
@@ -0,0 +1,252 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+ static class Optimizer
+ {
+ public static void RunPass(ControlFlowGraph cfg)
+ {
+ // Scratch buffer used to store uses.
+ Span<Operation> buffer = default;
+
+ bool modified;
+
+ do
+ {
+ modified = false;
+
+ for (BasicBlock block = cfg.Blocks.Last; block != null; block = block.ListPrevious)
+ {
+ Operation node;
+ Operation prevNode;
+
+ for (node = block.Operations.Last; node != default; node = prevNode)
+ {
+ prevNode = node.ListPrevious;
+
+ if (IsUnused(node))
+ {
+ RemoveNode(block, node);
+
+ modified = true;
+
+ continue;
+ }
+ else if (node.Instruction == Instruction.Phi)
+ {
+ continue;
+ }
+
+ ConstantFolding.RunPass(node);
+ Simplification.RunPass(node);
+
+ if (DestIsSingleLocalVar(node))
+ {
+ if (IsPropagableCompare(node))
+ {
+ modified |= PropagateCompare(ref buffer, node);
+
+ if (modified && IsUnused(node))
+ {
+ RemoveNode(block, node);
+ }
+ }
+ else if (IsPropagableCopy(node))
+ {
+ PropagateCopy(ref buffer, node);
+
+ RemoveNode(block, node);
+
+ modified = true;
+ }
+ }
+ }
+ }
+ }
+ while (modified);
+ }
+
+ public static void RemoveUnusedNodes(ControlFlowGraph cfg)
+ {
+ bool modified;
+
+ do
+ {
+ modified = false;
+
+ for (BasicBlock block = cfg.Blocks.Last; block != null; block = block.ListPrevious)
+ {
+ Operation node;
+ Operation prevNode;
+
+ for (node = block.Operations.Last; node != default; node = prevNode)
+ {
+ prevNode = node.ListPrevious;
+
+ if (IsUnused(node))
+ {
+ RemoveNode(block, node);
+
+ modified = true;
+ }
+ }
+ }
+ }
+ while (modified);
+ }
+
+ private static bool PropagateCompare(ref Span<Operation> buffer, Operation compOp)
+ {
+ // Try to propagate Compare operations into their BranchIf uses, when these BranchIf uses are in the form
+ // of:
+ //
+ // - BranchIf %x, 0x0, Equal ;; i.e BranchIfFalse %x
+ // - BranchIf %x, 0x0, NotEqual ;; i.e BranchIfTrue %x
+ //
+ // The commutative property of Equal and NotEqual is taken into consideration as well.
+ //
+ // For example:
+ //
+ // %x = Compare %a, %b, comp
+ // BranchIf %x, 0x0, NotEqual
+ //
+ // =>
+ //
+ // BranchIf %a, %b, comp
+
+ static bool IsZeroBranch(Operation operation, out Comparison compType)
+ {
+ compType = Comparison.Equal;
+
+ if (operation.Instruction != Instruction.BranchIf)
+ {
+ return false;
+ }
+
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand comp = operation.GetSource(2);
+
+ compType = (Comparison)comp.AsInt32();
+
+ return (src1.Kind == OperandKind.Constant && src1.Value == 0) ||
+ (src2.Kind == OperandKind.Constant && src2.Value == 0);
+ }
+
+ bool modified = false;
+
+ Operand dest = compOp.Destination;
+ Operand src1 = compOp.GetSource(0);
+ Operand src2 = compOp.GetSource(1);
+ Operand comp = compOp.GetSource(2);
+
+ Comparison compType = (Comparison)comp.AsInt32();
+
+ Span<Operation> uses = dest.GetUses(ref buffer);
+
+ foreach (Operation use in uses)
+ {
+ // If operation is a BranchIf and has a constant value 0 in its RHS or LHS source operands.
+ if (IsZeroBranch(use, out Comparison otherCompType))
+ {
+ Comparison propCompType;
+
+ if (otherCompType == Comparison.NotEqual)
+ {
+ propCompType = compType;
+ }
+ else if (otherCompType == Comparison.Equal)
+ {
+ propCompType = compType.Invert();
+ }
+ else
+ {
+ continue;
+ }
+
+ use.SetSource(0, src1);
+ use.SetSource(1, src2);
+ use.SetSource(2, Const((int)propCompType));
+
+ modified = true;
+ }
+ }
+
+ return modified;
+ }
+
+ private static void PropagateCopy(ref Span<Operation> buffer, Operation copyOp)
+ {
+ // Propagate copy source operand to all uses of the destination operand.
+ Operand dest = copyOp.Destination;
+ Operand source = copyOp.GetSource(0);
+
+ Span<Operation> uses = dest.GetUses(ref buffer);
+
+ foreach (Operation use in uses)
+ {
+ for (int index = 0; index < use.SourcesCount; index++)
+ {
+ if (use.GetSource(index) == dest)
+ {
+ use.SetSource(index, source);
+ }
+ }
+ }
+ }
+
+ private static void RemoveNode(BasicBlock block, Operation node)
+ {
+ // Remove a node from the nodes list, and also remove itself
+ // from all the use lists on the operands that this node uses.
+ block.Operations.Remove(node);
+
+ for (int index = 0; index < node.SourcesCount; index++)
+ {
+ node.SetSource(index, default);
+ }
+
+ Debug.Assert(node.Destination == default || node.Destination.UsesCount == 0);
+
+ node.Destination = default;
+ }
+
+ private static bool IsUnused(Operation node)
+ {
+ return DestIsSingleLocalVar(node) && node.Destination.UsesCount == 0 && !HasSideEffects(node);
+ }
+
+ private static bool DestIsSingleLocalVar(Operation node)
+ {
+ return node.DestinationsCount == 1 && node.Destination.Kind == OperandKind.LocalVariable;
+ }
+
+ private static bool HasSideEffects(Operation node)
+ {
+ return node.Instruction == Instruction.Call
+ || node.Instruction == Instruction.Tailcall
+ || node.Instruction == Instruction.CompareAndSwap
+ || node.Instruction == Instruction.CompareAndSwap16
+ || node.Instruction == Instruction.CompareAndSwap8;
+ }
+
+ private static bool IsPropagableCompare(Operation operation)
+ {
+ return operation.Instruction == Instruction.Compare;
+ }
+
+ private static bool IsPropagableCopy(Operation operation)
+ {
+ if (operation.Instruction != Instruction.Copy)
+ {
+ return false;
+ }
+
+ return operation.Destination.Type == operation.GetSource(0).Type;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Optimizations/Simplification.cs b/src/ARMeilleure/CodeGen/Optimizations/Simplification.cs
new file mode 100644
index 00000000..a439d642
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Optimizations/Simplification.cs
@@ -0,0 +1,183 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+ static class Simplification
+ {
+ public static void RunPass(Operation operation)
+ {
+ switch (operation.Instruction)
+ {
+ case Instruction.Add:
+ if (operation.GetSource(0).Relocatable ||
+ operation.GetSource(1).Relocatable)
+ {
+ break;
+ }
+
+ TryEliminateBinaryOpComutative(operation, 0);
+ break;
+
+ case Instruction.BitwiseAnd:
+ TryEliminateBitwiseAnd(operation);
+ break;
+
+ case Instruction.BitwiseOr:
+ TryEliminateBitwiseOr(operation);
+ break;
+
+ case Instruction.BitwiseExclusiveOr:
+ TryEliminateBitwiseExclusiveOr(operation);
+ break;
+
+ case Instruction.ConditionalSelect:
+ TryEliminateConditionalSelect(operation);
+ break;
+
+ case Instruction.Divide:
+ TryEliminateBinaryOpY(operation, 1);
+ break;
+
+ case Instruction.Multiply:
+ TryEliminateBinaryOpComutative(operation, 1);
+ break;
+
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ case Instruction.Subtract:
+ TryEliminateBinaryOpY(operation, 0);
+ break;
+ }
+ }
+
+ private static void TryEliminateBitwiseAnd(Operation operation)
+ {
+ // Try to recognize and optimize those 3 patterns (in order):
+ // x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y,
+ // x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(x, AllOnes(x.Type)))
+ {
+ operation.TurnIntoCopy(y);
+ }
+ else if (IsConstEqual(y, AllOnes(y.Type)))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ else if (IsConstEqual(x, 0) || IsConstEqual(y, 0))
+ {
+ operation.TurnIntoCopy(Const(x.Type, 0));
+ }
+ }
+
+ private static void TryEliminateBitwiseOr(Operation operation)
+ {
+ // Try to recognize and optimize those 3 patterns (in order):
+ // x | 0x00000000 == x, 0x00000000 | y == y,
+ // x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(x, 0))
+ {
+ operation.TurnIntoCopy(y);
+ }
+ else if (IsConstEqual(y, 0))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ else if (IsConstEqual(x, AllOnes(x.Type)) || IsConstEqual(y, AllOnes(y.Type)))
+ {
+ operation.TurnIntoCopy(Const(AllOnes(x.Type)));
+ }
+ }
+
+ private static void TryEliminateBitwiseExclusiveOr(Operation operation)
+ {
+ // Try to recognize and optimize those 2 patterns (in order):
+ // x ^ y == 0x00000000 when x == y
+ // 0x00000000 ^ y == y, x ^ 0x00000000 == x
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (x == y && x.Type.IsInteger())
+ {
+ operation.TurnIntoCopy(Const(x.Type, 0));
+ }
+ else
+ {
+ TryEliminateBinaryOpComutative(operation, 0);
+ }
+ }
+
+ private static void TryEliminateBinaryOpY(Operation operation, ulong comparand)
+ {
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(y, comparand))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ }
+
+ private static void TryEliminateBinaryOpComutative(Operation operation, ulong comparand)
+ {
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(x, comparand))
+ {
+ operation.TurnIntoCopy(y);
+ }
+ else if (IsConstEqual(y, comparand))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ }
+
+ private static void TryEliminateConditionalSelect(Operation operation)
+ {
+ Operand cond = operation.GetSource(0);
+
+ if (cond.Kind != OperandKind.Constant)
+ {
+ return;
+ }
+
+ // The condition is constant, we can turn it into a copy, and select
+ // the source based on the condition value.
+ int srcIndex = cond.Value != 0 ? 1 : 2;
+
+ Operand source = operation.GetSource(srcIndex);
+
+ operation.TurnIntoCopy(source);
+ }
+
+ private static bool IsConstEqual(Operand operand, ulong comparand)
+ {
+ if (operand.Kind != OperandKind.Constant || !operand.Type.IsInteger())
+ {
+ return false;
+ }
+
+ return operand.Value == comparand;
+ }
+
+ private static ulong AllOnes(OperandType type)
+ {
+ switch (type)
+ {
+ case OperandType.I32: return ~0U;
+ case OperandType.I64: return ~0UL;
+ }
+
+ throw new ArgumentException("Invalid operand type \"" + type + "\".");
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Optimizations/TailMerge.cs b/src/ARMeilleure/CodeGen/Optimizations/TailMerge.cs
new file mode 100644
index 00000000..e94df159
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Optimizations/TailMerge.cs
@@ -0,0 +1,83 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.Optimizations
+{
+ static class TailMerge
+ {
+ public static void RunPass(in CompilerContext cctx)
+ {
+ ControlFlowGraph cfg = cctx.Cfg;
+
+ BasicBlock mergedReturn = new(cfg.Blocks.Count);
+
+ Operand returnValue;
+ Operation returnOp;
+
+ if (cctx.FuncReturnType == OperandType.None)
+ {
+ returnValue = default;
+ returnOp = Operation(Instruction.Return, default);
+ }
+ else
+ {
+ returnValue = cfg.AllocateLocal(cctx.FuncReturnType);
+ returnOp = Operation(Instruction.Return, default, returnValue);
+ }
+
+ mergedReturn.Frequency = BasicBlockFrequency.Cold;
+ mergedReturn.Operations.AddLast(returnOp);
+
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ Operation op = block.Operations.Last;
+
+ if (op != default && op.Instruction == Instruction.Return)
+ {
+ block.Operations.Remove(op);
+
+ if (cctx.FuncReturnType == OperandType.None)
+ {
+ PrepareMerge(block, mergedReturn);
+ }
+ else
+ {
+ Operation copyOp = Operation(Instruction.Copy, returnValue, op.GetSource(0));
+
+ PrepareMerge(block, mergedReturn).Append(copyOp);
+ }
+ }
+ }
+
+ cfg.Blocks.AddLast(mergedReturn);
+ cfg.Update();
+ }
+
+ private static BasicBlock PrepareMerge(BasicBlock from, BasicBlock to)
+ {
+ BasicBlock fromPred = from.Predecessors.Count == 1 ? from.Predecessors[0] : null;
+
+ // If the block is empty, we can try to append to the predecessor and avoid unnecessary jumps.
+ if (from.Operations.Count == 0 && fromPred != null && fromPred.SuccessorsCount == 1)
+ {
+ for (int i = 0; i < fromPred.SuccessorsCount; i++)
+ {
+ if (fromPred.GetSuccessor(i) == from)
+ {
+ fromPred.SetSuccessor(i, to);
+ }
+ }
+
+ // NOTE: `from` becomes unreachable and the call to `cfg.Update()` will remove it.
+ return fromPred;
+ }
+ else
+ {
+ from.AddSuccessor(to);
+
+ return from;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/PreAllocatorCommon.cs b/src/ARMeilleure/CodeGen/PreAllocatorCommon.cs
new file mode 100644
index 00000000..53f279fb
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/PreAllocatorCommon.cs
@@ -0,0 +1,57 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.CodeGen
+{
+ static class PreAllocatorCommon
+ {
+ public static void Propagate(ref Span<Operation> buffer, Operand dest, Operand value)
+ {
+ ReadOnlySpan<Operation> uses = dest.GetUses(ref buffer);
+
+ foreach (Operation use in uses)
+ {
+ for (int srcIndex = 0; srcIndex < use.SourcesCount; srcIndex++)
+ {
+ Operand useSrc = use.GetSource(srcIndex);
+
+ if (useSrc == dest)
+ {
+ use.SetSource(srcIndex, value);
+ }
+ else if (useSrc.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memoryOp = useSrc.GetMemory();
+
+ Operand baseAddr = memoryOp.BaseAddress;
+ Operand index = memoryOp.Index;
+ bool changed = false;
+
+ if (baseAddr == dest)
+ {
+ baseAddr = value;
+ changed = true;
+ }
+
+ if (index == dest)
+ {
+ index = value;
+ changed = true;
+ }
+
+ if (changed)
+ {
+ use.SetSource(srcIndex, MemoryOp(
+ useSrc.Type,
+ baseAddr,
+ index,
+ memoryOp.Scale,
+ memoryOp.Displacement));
+ }
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs
new file mode 100644
index 00000000..43e5c7e2
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs
@@ -0,0 +1,19 @@
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ readonly struct AllocationResult
+ {
+ public int IntUsedRegisters { get; }
+ public int VecUsedRegisters { get; }
+ public int SpillRegionSize { get; }
+
+ public AllocationResult(
+ int intUsedRegisters,
+ int vecUsedRegisters,
+ int spillRegionSize)
+ {
+ IntUsedRegisters = intUsedRegisters;
+ VecUsedRegisters = vecUsedRegisters;
+ SpillRegionSize = spillRegionSize;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs
new file mode 100644
index 00000000..587b1a02
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs
@@ -0,0 +1,259 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Collections.Generic;
+
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ class CopyResolver
+ {
+ private class ParallelCopy
+ {
+ private readonly struct Copy
+ {
+ public Register Dest { get; }
+ public Register Source { get; }
+
+ public OperandType Type { get; }
+
+ public Copy(Register dest, Register source, OperandType type)
+ {
+ Dest = dest;
+ Source = source;
+ Type = type;
+ }
+ }
+
+ private readonly List<Copy> _copies;
+
+ public int Count => _copies.Count;
+
+ public ParallelCopy()
+ {
+ _copies = new List<Copy>();
+ }
+
+ public void AddCopy(Register dest, Register source, OperandType type)
+ {
+ _copies.Add(new Copy(dest, source, type));
+ }
+
+ public void Sequence(List<Operation> sequence)
+ {
+ Dictionary<Register, Register> locations = new Dictionary<Register, Register>();
+ Dictionary<Register, Register> sources = new Dictionary<Register, Register>();
+
+ Dictionary<Register, OperandType> types = new Dictionary<Register, OperandType>();
+
+ Queue<Register> pendingQueue = new Queue<Register>();
+ Queue<Register> readyQueue = new Queue<Register>();
+
+ foreach (Copy copy in _copies)
+ {
+ locations[copy.Source] = copy.Source;
+ sources[copy.Dest] = copy.Source;
+ types[copy.Dest] = copy.Type;
+
+ pendingQueue.Enqueue(copy.Dest);
+ }
+
+ foreach (Copy copy in _copies)
+ {
+ // If the destination is not used anywhere, we can assign it immediately.
+ if (!locations.ContainsKey(copy.Dest))
+ {
+ readyQueue.Enqueue(copy.Dest);
+ }
+ }
+
+ while (pendingQueue.TryDequeue(out Register current))
+ {
+ Register copyDest;
+ Register origSource;
+ Register copySource;
+
+ while (readyQueue.TryDequeue(out copyDest))
+ {
+ origSource = sources[copyDest];
+ copySource = locations[origSource];
+
+ OperandType type = types[copyDest];
+
+ EmitCopy(sequence, GetRegister(copyDest, type), GetRegister(copySource, type));
+
+ locations[origSource] = copyDest;
+
+ if (origSource == copySource && sources.ContainsKey(origSource))
+ {
+ readyQueue.Enqueue(origSource);
+ }
+ }
+
+ copyDest = current;
+ origSource = sources[copyDest];
+ copySource = locations[origSource];
+
+ if (copyDest != copySource)
+ {
+ OperandType type = types[copyDest];
+
+ type = type.IsInteger() ? OperandType.I64 : OperandType.V128;
+
+ EmitXorSwap(sequence, GetRegister(copyDest, type), GetRegister(copySource, type));
+
+ locations[origSource] = copyDest;
+
+ Register swapOther = copySource;
+
+ if (copyDest != locations[sources[copySource]])
+ {
+ // Find the other swap destination register.
+ // To do that, we search all the pending registers, and pick
+ // the one where the copy source register is equal to the
+ // current destination register being processed (copyDest).
+ foreach (Register pending in pendingQueue)
+ {
+ // Is this a copy of pending <- copyDest?
+ if (copyDest == locations[sources[pending]])
+ {
+ swapOther = pending;
+
+ break;
+ }
+ }
+ }
+
+ // The value that was previously at "copyDest" now lives on
+ // "copySource" thanks to the swap, now we need to update the
+ // location for the next copy that is supposed to copy the value
+ // that used to live on "copyDest".
+ locations[sources[swapOther]] = copySource;
+ }
+ }
+ }
+
+ private static void EmitCopy(List<Operation> sequence, Operand x, Operand y)
+ {
+ sequence.Add(Operation(Instruction.Copy, x, y));
+ }
+
+ private static void EmitXorSwap(List<Operation> sequence, Operand x, Operand y)
+ {
+ sequence.Add(Operation(Instruction.BitwiseExclusiveOr, x, x, y));
+ sequence.Add(Operation(Instruction.BitwiseExclusiveOr, y, y, x));
+ sequence.Add(Operation(Instruction.BitwiseExclusiveOr, x, x, y));
+ }
+ }
+
+ private Queue<Operation> _fillQueue = null;
+ private Queue<Operation> _spillQueue = null;
+ private ParallelCopy _parallelCopy = null;
+
+ public bool HasCopy { get; private set; }
+
+ public void AddSplit(LiveInterval left, LiveInterval right)
+ {
+ if (left.Local != right.Local)
+ {
+ throw new ArgumentException("Intervals of different variables are not allowed.");
+ }
+
+ OperandType type = left.Local.Type;
+
+ if (left.IsSpilled && !right.IsSpilled)
+ {
+ // Move from the stack to a register.
+ AddSplitFill(left, right, type);
+ }
+ else if (!left.IsSpilled && right.IsSpilled)
+ {
+ // Move from a register to the stack.
+ AddSplitSpill(left, right, type);
+ }
+ else if (!left.IsSpilled && !right.IsSpilled && left.Register != right.Register)
+ {
+ // Move from one register to another.
+ AddSplitCopy(left, right, type);
+ }
+ else if (left.SpillOffset != right.SpillOffset)
+ {
+ // This would be the stack-to-stack move case, but this is not supported.
+ throw new ArgumentException("Both intervals were spilled.");
+ }
+ }
+
+ private void AddSplitFill(LiveInterval left, LiveInterval right, OperandType type)
+ {
+ if (_fillQueue == null)
+ {
+ _fillQueue = new Queue<Operation>();
+ }
+
+ Operand register = GetRegister(right.Register, type);
+ Operand offset = Const(left.SpillOffset);
+
+ _fillQueue.Enqueue(Operation(Instruction.Fill, register, offset));
+
+ HasCopy = true;
+ }
+
+ private void AddSplitSpill(LiveInterval left, LiveInterval right, OperandType type)
+ {
+ if (_spillQueue == null)
+ {
+ _spillQueue = new Queue<Operation>();
+ }
+
+ Operand offset = Const(right.SpillOffset);
+ Operand register = GetRegister(left.Register, type);
+
+ _spillQueue.Enqueue(Operation(Instruction.Spill, default, offset, register));
+
+ HasCopy = true;
+ }
+
+ private void AddSplitCopy(LiveInterval left, LiveInterval right, OperandType type)
+ {
+ if (_parallelCopy == null)
+ {
+ _parallelCopy = new ParallelCopy();
+ }
+
+ _parallelCopy.AddCopy(right.Register, left.Register, type);
+
+ HasCopy = true;
+ }
+
+ public Operation[] Sequence()
+ {
+ List<Operation> sequence = new List<Operation>();
+
+ if (_spillQueue != null)
+ {
+ while (_spillQueue.TryDequeue(out Operation spillOp))
+ {
+ sequence.Add(spillOp);
+ }
+ }
+
+ _parallelCopy?.Sequence(sequence);
+
+ if (_fillQueue != null)
+ {
+ while (_fillQueue.TryDequeue(out Operation fillOp))
+ {
+ sequence.Add(fillOp);
+ }
+ }
+
+ return sequence.ToArray();
+ }
+
+ private static Operand GetRegister(Register reg, OperandType type)
+ {
+ return Register(reg.Index, reg.Type, type);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs
new file mode 100644
index 00000000..25952c77
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs
@@ -0,0 +1,454 @@
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ class HybridAllocator : IRegisterAllocator
+ {
+ private readonly struct BlockInfo
+ {
+ public bool HasCall { get; }
+
+ public int IntFixedRegisters { get; }
+ public int VecFixedRegisters { get; }
+
+ public BlockInfo(bool hasCall, int intFixedRegisters, int vecFixedRegisters)
+ {
+ HasCall = hasCall;
+ IntFixedRegisters = intFixedRegisters;
+ VecFixedRegisters = vecFixedRegisters;
+ }
+ }
+
+ private struct LocalInfo
+ {
+ public int Uses { get; set; }
+ public int UsesAllocated { get; set; }
+ public int Sequence { get; set; }
+ public Operand Temp { get; set; }
+ public Operand Register { get; set; }
+ public Operand SpillOffset { get; set; }
+ public OperandType Type { get; }
+
+ private int _first;
+ private int _last;
+
+ public bool IsBlockLocal => _first == _last;
+
+ public LocalInfo(OperandType type, int uses, int blkIndex)
+ {
+ Uses = uses;
+ Type = type;
+
+ UsesAllocated = 0;
+ Sequence = 0;
+ Temp = default;
+ Register = default;
+ SpillOffset = default;
+
+ _first = -1;
+ _last = -1;
+
+ SetBlockIndex(blkIndex);
+ }
+
+ public void SetBlockIndex(int blkIndex)
+ {
+ if (_first == -1 || blkIndex < _first)
+ {
+ _first = blkIndex;
+ }
+
+ if (_last == -1 || blkIndex > _last)
+ {
+ _last = blkIndex;
+ }
+ }
+ }
+
+ private const int MaxIROperands = 4;
+ // The "visited" state is stored in the MSB of the local's value.
+ private const ulong VisitedMask = 1ul << 63;
+
+ private BlockInfo[] _blockInfo;
+ private LocalInfo[] _localInfo;
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static bool IsVisited(Operand local)
+ {
+ Debug.Assert(local.Kind == OperandKind.LocalVariable);
+
+ return (local.GetValueUnsafe() & VisitedMask) != 0;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void SetVisited(Operand local)
+ {
+ Debug.Assert(local.Kind == OperandKind.LocalVariable);
+
+ local.GetValueUnsafe() |= VisitedMask;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private ref LocalInfo GetLocalInfo(Operand local)
+ {
+ Debug.Assert(local.Kind == OperandKind.LocalVariable);
+ Debug.Assert(IsVisited(local), "Local variable not visited. Used before defined?");
+
+ return ref _localInfo[(uint)local.GetValueUnsafe() - 1];
+ }
+
+ public AllocationResult RunPass(ControlFlowGraph cfg, StackAllocator stackAlloc, RegisterMasks regMasks)
+ {
+ int intUsedRegisters = 0;
+ int vecUsedRegisters = 0;
+
+ int intFreeRegisters = regMasks.IntAvailableRegisters;
+ int vecFreeRegisters = regMasks.VecAvailableRegisters;
+
+ _blockInfo = new BlockInfo[cfg.Blocks.Count];
+ _localInfo = new LocalInfo[cfg.Blocks.Count * 3];
+
+ int localInfoCount = 0;
+
+ for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ int intFixedRegisters = 0;
+ int vecFixedRegisters = 0;
+
+ bool hasCall = false;
+
+ for (Operation node = block.Operations.First; node != default; node = node.ListNext)
+ {
+ if (node.Instruction == Instruction.Call)
+ {
+ hasCall = true;
+ }
+
+ foreach (Operand source in node.SourcesUnsafe)
+ {
+ if (source.Kind == OperandKind.LocalVariable)
+ {
+ GetLocalInfo(source).SetBlockIndex(block.Index);
+ }
+ else if (source.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = source.GetMemory();
+
+ if (memOp.BaseAddress != default)
+ {
+ GetLocalInfo(memOp.BaseAddress).SetBlockIndex(block.Index);
+ }
+
+ if (memOp.Index != default)
+ {
+ GetLocalInfo(memOp.Index).SetBlockIndex(block.Index);
+ }
+ }
+ }
+
+ foreach (Operand dest in node.DestinationsUnsafe)
+ {
+ if (dest.Kind == OperandKind.LocalVariable)
+ {
+ if (IsVisited(dest))
+ {
+ GetLocalInfo(dest).SetBlockIndex(block.Index);
+ }
+ else
+ {
+ dest.NumberLocal(++localInfoCount);
+
+ if (localInfoCount > _localInfo.Length)
+ {
+ Array.Resize(ref _localInfo, localInfoCount * 2);
+ }
+
+ SetVisited(dest);
+ GetLocalInfo(dest) = new LocalInfo(dest.Type, UsesCount(dest), block.Index);
+ }
+ }
+ else if (dest.Kind == OperandKind.Register)
+ {
+ if (dest.Type.IsInteger())
+ {
+ intFixedRegisters |= 1 << dest.GetRegister().Index;
+ }
+ else
+ {
+ vecFixedRegisters |= 1 << dest.GetRegister().Index;
+ }
+ }
+ }
+ }
+
+ _blockInfo[block.Index] = new BlockInfo(hasCall, intFixedRegisters, vecFixedRegisters);
+ }
+
+ int sequence = 0;
+
+ for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ ref BlockInfo blkInfo = ref _blockInfo[block.Index];
+
+ int intLocalFreeRegisters = intFreeRegisters & ~blkInfo.IntFixedRegisters;
+ int vecLocalFreeRegisters = vecFreeRegisters & ~blkInfo.VecFixedRegisters;
+
+ int intCallerSavedRegisters = blkInfo.HasCall ? regMasks.IntCallerSavedRegisters : 0;
+ int vecCallerSavedRegisters = blkInfo.HasCall ? regMasks.VecCallerSavedRegisters : 0;
+
+ int intSpillTempRegisters = SelectSpillTemps(
+ intCallerSavedRegisters & ~blkInfo.IntFixedRegisters,
+ intLocalFreeRegisters);
+ int vecSpillTempRegisters = SelectSpillTemps(
+ vecCallerSavedRegisters & ~blkInfo.VecFixedRegisters,
+ vecLocalFreeRegisters);
+
+ intLocalFreeRegisters &= ~(intSpillTempRegisters | intCallerSavedRegisters);
+ vecLocalFreeRegisters &= ~(vecSpillTempRegisters | vecCallerSavedRegisters);
+
+ for (Operation node = block.Operations.First; node != default; node = node.ListNext)
+ {
+ int intLocalUse = 0;
+ int vecLocalUse = 0;
+
+ Operand AllocateRegister(Operand local)
+ {
+ ref LocalInfo info = ref GetLocalInfo(local);
+
+ info.UsesAllocated++;
+
+ Debug.Assert(info.UsesAllocated <= info.Uses);
+
+ if (info.Register != default)
+ {
+ if (info.UsesAllocated == info.Uses)
+ {
+ Register reg = info.Register.GetRegister();
+
+ if (local.Type.IsInteger())
+ {
+ intLocalFreeRegisters |= 1 << reg.Index;
+ }
+ else
+ {
+ vecLocalFreeRegisters |= 1 << reg.Index;
+ }
+ }
+
+ return info.Register;
+ }
+ else
+ {
+ Operand temp = info.Temp;
+
+ if (temp == default || info.Sequence != sequence)
+ {
+ temp = local.Type.IsInteger()
+ ? GetSpillTemp(local, intSpillTempRegisters, ref intLocalUse)
+ : GetSpillTemp(local, vecSpillTempRegisters, ref vecLocalUse);
+
+ info.Sequence = sequence;
+ info.Temp = temp;
+ }
+
+ Operation fillOp = Operation(Instruction.Fill, temp, info.SpillOffset);
+
+ block.Operations.AddBefore(node, fillOp);
+
+ return temp;
+ }
+ }
+
+ bool folded = false;
+
+ // If operation is a copy of a local and that local is living on the stack, we turn the copy into
+ // a fill, instead of inserting a fill before it.
+ if (node.Instruction == Instruction.Copy)
+ {
+ Operand source = node.GetSource(0);
+
+ if (source.Kind == OperandKind.LocalVariable)
+ {
+ ref LocalInfo info = ref GetLocalInfo(source);
+
+ if (info.Register == default)
+ {
+ Operation fillOp = Operation(Instruction.Fill, node.Destination, info.SpillOffset);
+
+ block.Operations.AddBefore(node, fillOp);
+ block.Operations.Remove(node);
+
+ node = fillOp;
+
+ folded = true;
+ }
+ }
+ }
+
+ if (!folded)
+ {
+ foreach (ref Operand source in node.SourcesUnsafe)
+ {
+ if (source.Kind == OperandKind.LocalVariable)
+ {
+ source = AllocateRegister(source);
+ }
+ else if (source.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = source.GetMemory();
+
+ if (memOp.BaseAddress != default)
+ {
+ memOp.BaseAddress = AllocateRegister(memOp.BaseAddress);
+ }
+
+ if (memOp.Index != default)
+ {
+ memOp.Index = AllocateRegister(memOp.Index);
+ }
+ }
+ }
+ }
+
+ int intLocalAsg = 0;
+ int vecLocalAsg = 0;
+
+ foreach (ref Operand dest in node.DestinationsUnsafe)
+ {
+ if (dest.Kind != OperandKind.LocalVariable)
+ {
+ continue;
+ }
+
+ ref LocalInfo info = ref GetLocalInfo(dest);
+
+ if (info.UsesAllocated == 0)
+ {
+ int mask = dest.Type.IsInteger()
+ ? intLocalFreeRegisters
+ : vecLocalFreeRegisters;
+
+ if (info.IsBlockLocal && mask != 0)
+ {
+ int selectedReg = BitOperations.TrailingZeroCount(mask);
+
+ info.Register = Register(selectedReg, info.Type.ToRegisterType(), info.Type);
+
+ if (dest.Type.IsInteger())
+ {
+ intLocalFreeRegisters &= ~(1 << selectedReg);
+ intUsedRegisters |= 1 << selectedReg;
+ }
+ else
+ {
+ vecLocalFreeRegisters &= ~(1 << selectedReg);
+ vecUsedRegisters |= 1 << selectedReg;
+ }
+ }
+ else
+ {
+ info.Register = default;
+ info.SpillOffset = Const(stackAlloc.Allocate(dest.Type.GetSizeInBytes()));
+ }
+ }
+
+ info.UsesAllocated++;
+
+ Debug.Assert(info.UsesAllocated <= info.Uses);
+
+ if (info.Register != default)
+ {
+ dest = info.Register;
+ }
+ else
+ {
+ Operand temp = info.Temp;
+
+ if (temp == default || info.Sequence != sequence)
+ {
+ temp = dest.Type.IsInteger()
+ ? GetSpillTemp(dest, intSpillTempRegisters, ref intLocalAsg)
+ : GetSpillTemp(dest, vecSpillTempRegisters, ref vecLocalAsg);
+
+ info.Sequence = sequence;
+ info.Temp = temp;
+ }
+
+ dest = temp;
+
+ Operation spillOp = Operation(Instruction.Spill, default, info.SpillOffset, temp);
+
+ block.Operations.AddAfter(node, spillOp);
+
+ node = spillOp;
+ }
+ }
+
+ sequence++;
+
+ intUsedRegisters |= intLocalAsg | intLocalUse;
+ vecUsedRegisters |= vecLocalAsg | vecLocalUse;
+ }
+ }
+
+ return new AllocationResult(intUsedRegisters, vecUsedRegisters, stackAlloc.TotalSize);
+ }
+
+ private static int SelectSpillTemps(int mask0, int mask1)
+ {
+ int selection = 0;
+ int count = 0;
+
+ while (count < MaxIROperands && mask0 != 0)
+ {
+ int mask = mask0 & -mask0;
+
+ selection |= mask;
+
+ mask0 &= ~mask;
+
+ count++;
+ }
+
+ while (count < MaxIROperands && mask1 != 0)
+ {
+ int mask = mask1 & -mask1;
+
+ selection |= mask;
+
+ mask1 &= ~mask;
+
+ count++;
+ }
+
+ Debug.Assert(count == MaxIROperands, "No enough registers for spill temps.");
+
+ return selection;
+ }
+
+ private static Operand GetSpillTemp(Operand local, int freeMask, ref int useMask)
+ {
+ int selectedReg = BitOperations.TrailingZeroCount(freeMask & ~useMask);
+
+ useMask |= 1 << selectedReg;
+
+ return Register(selectedReg, local.Type.ToRegisterType(), local.Type);
+ }
+
+ private static int UsesCount(Operand local)
+ {
+ return local.AssignmentsCount + local.UsesCount;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs
new file mode 100644
index 00000000..8f236c25
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs
@@ -0,0 +1,12 @@
+using ARMeilleure.Translation;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ interface IRegisterAllocator
+ {
+ AllocationResult RunPass(
+ ControlFlowGraph cfg,
+ StackAllocator stackAlloc,
+ RegisterMasks regMasks);
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs
new file mode 100644
index 00000000..d80157af
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs
@@ -0,0 +1,1101 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Numerics;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ // Based on:
+ // "Linear Scan Register Allocation for the Java(tm) HotSpot Client Compiler".
+ // http://www.christianwimmer.at/Publications/Wimmer04a/Wimmer04a.pdf
+ class LinearScanAllocator : IRegisterAllocator
+ {
+ private const int InstructionGap = 2;
+ private const int InstructionGapMask = InstructionGap - 1;
+
+ private HashSet<int> _blockEdges;
+ private LiveRange[] _blockRanges;
+ private BitMap[] _blockLiveIn;
+
+ private List<LiveInterval> _intervals;
+ private LiveInterval[] _parentIntervals;
+
+ private List<(IntrusiveList<Operation>, Operation)> _operationNodes;
+ private int _operationsCount;
+
+ private class AllocationContext
+ {
+ public RegisterMasks Masks { get; }
+
+ public StackAllocator StackAlloc { get; }
+
+ public BitMap Active { get; }
+ public BitMap Inactive { get; }
+
+ public int IntUsedRegisters { get; set; }
+ public int VecUsedRegisters { get; set; }
+
+ private readonly int[] _intFreePositions;
+ private readonly int[] _vecFreePositions;
+ private readonly int _intFreePositionsCount;
+ private readonly int _vecFreePositionsCount;
+
+ public AllocationContext(StackAllocator stackAlloc, RegisterMasks masks, int intervalsCount)
+ {
+ StackAlloc = stackAlloc;
+ Masks = masks;
+
+ Active = new BitMap(Allocators.Default, intervalsCount);
+ Inactive = new BitMap(Allocators.Default, intervalsCount);
+
+ PopulateFreePositions(RegisterType.Integer, out _intFreePositions, out _intFreePositionsCount);
+ PopulateFreePositions(RegisterType.Vector, out _vecFreePositions, out _vecFreePositionsCount);
+
+ void PopulateFreePositions(RegisterType type, out int[] positions, out int count)
+ {
+ positions = new int[masks.RegistersCount];
+ count = BitOperations.PopCount((uint)masks.GetAvailableRegisters(type));
+
+ int mask = masks.GetAvailableRegisters(type);
+
+ for (int i = 0; i < positions.Length; i++)
+ {
+ if ((mask & (1 << i)) != 0)
+ {
+ positions[i] = int.MaxValue;
+ }
+ }
+ }
+ }
+
+ public void GetFreePositions(RegisterType type, in Span<int> positions, out int count)
+ {
+ if (type == RegisterType.Integer)
+ {
+ _intFreePositions.CopyTo(positions);
+
+ count = _intFreePositionsCount;
+ }
+ else
+ {
+ Debug.Assert(type == RegisterType.Vector);
+
+ _vecFreePositions.CopyTo(positions);
+
+ count = _vecFreePositionsCount;
+ }
+ }
+
+ public void MoveActiveToInactive(int bit)
+ {
+ Move(Active, Inactive, bit);
+ }
+
+ public void MoveInactiveToActive(int bit)
+ {
+ Move(Inactive, Active, bit);
+ }
+
+ private static void Move(BitMap source, BitMap dest, int bit)
+ {
+ source.Clear(bit);
+
+ dest.Set(bit);
+ }
+ }
+
+ public AllocationResult RunPass(
+ ControlFlowGraph cfg,
+ StackAllocator stackAlloc,
+ RegisterMasks regMasks)
+ {
+ NumberLocals(cfg, regMasks.RegistersCount);
+
+ var context = new AllocationContext(stackAlloc, regMasks, _intervals.Count);
+
+ BuildIntervals(cfg, context);
+
+ for (int index = 0; index < _intervals.Count; index++)
+ {
+ LiveInterval current = _intervals[index];
+
+ if (current.IsEmpty)
+ {
+ continue;
+ }
+
+ if (current.IsFixed)
+ {
+ context.Active.Set(index);
+
+ if (current.IsFixedAndUsed)
+ {
+ if (current.Register.Type == RegisterType.Integer)
+ {
+ context.IntUsedRegisters |= 1 << current.Register.Index;
+ }
+ else /* if (interval.Register.Type == RegisterType.Vector) */
+ {
+ context.VecUsedRegisters |= 1 << current.Register.Index;
+ }
+ }
+
+ continue;
+ }
+
+ AllocateInterval(context, current, index, regMasks.RegistersCount);
+ }
+
+ for (int index = regMasks.RegistersCount * 2; index < _intervals.Count; index++)
+ {
+ if (!_intervals[index].IsSpilled)
+ {
+ ReplaceLocalWithRegister(_intervals[index]);
+ }
+ }
+
+ InsertSplitCopies();
+ InsertSplitCopiesAtEdges(cfg);
+
+ return new AllocationResult(context.IntUsedRegisters, context.VecUsedRegisters, context.StackAlloc.TotalSize);
+ }
+
+ private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex, int registersCount)
+ {
+ // Check active intervals that already ended.
+ foreach (int iIndex in context.Active)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ interval.Forward(current.GetStart());
+
+ if (interval.GetEnd() < current.GetStart())
+ {
+ context.Active.Clear(iIndex);
+ }
+ else if (!interval.Overlaps(current.GetStart()))
+ {
+ context.MoveActiveToInactive(iIndex);
+ }
+ }
+
+ // Check inactive intervals that already ended or were reactivated.
+ foreach (int iIndex in context.Inactive)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ interval.Forward(current.GetStart());
+
+ if (interval.GetEnd() < current.GetStart())
+ {
+ context.Inactive.Clear(iIndex);
+ }
+ else if (interval.Overlaps(current.GetStart()))
+ {
+ context.MoveInactiveToActive(iIndex);
+ }
+ }
+
+ if (!TryAllocateRegWithoutSpill(context, current, cIndex, registersCount))
+ {
+ AllocateRegWithSpill(context, current, cIndex, registersCount);
+ }
+ }
+
+ private bool TryAllocateRegWithoutSpill(AllocationContext context, LiveInterval current, int cIndex, int registersCount)
+ {
+ RegisterType regType = current.Local.Type.ToRegisterType();
+
+ Span<int> freePositions = stackalloc int[registersCount];
+
+ context.GetFreePositions(regType, freePositions, out int freePositionsCount);
+
+ foreach (int iIndex in context.Active)
+ {
+ LiveInterval interval = _intervals[iIndex];
+ Register reg = interval.Register;
+
+ if (reg.Type == regType)
+ {
+ freePositions[reg.Index] = 0;
+ freePositionsCount--;
+ }
+ }
+
+ // If all registers are already active, return early. No point in inspecting the inactive set to look for
+ // holes.
+ if (freePositionsCount == 0)
+ {
+ return false;
+ }
+
+ foreach (int iIndex in context.Inactive)
+ {
+ LiveInterval interval = _intervals[iIndex];
+ Register reg = interval.Register;
+
+ ref int freePosition = ref freePositions[reg.Index];
+
+ if (reg.Type == regType && freePosition != 0)
+ {
+ int overlapPosition = interval.GetOverlapPosition(current);
+
+ if (overlapPosition != LiveInterval.NotFound && freePosition > overlapPosition)
+ {
+ freePosition = overlapPosition;
+ }
+ }
+ }
+
+ int selectedReg = GetHighestValueIndex(freePositions);
+ int selectedNextUse = freePositions[selectedReg];
+
+ // Intervals starts and ends at odd positions, unless they span an entire
+ // block, in this case they will have ranges at a even position.
+ // When a interval is loaded from the stack to a register, we can only
+ // do the split at a odd position, because otherwise the split interval
+ // that is inserted on the list to be processed may clobber a register
+ // used by the instruction at the same position as the split.
+ // The problem only happens when a interval ends exactly at this instruction,
+ // because otherwise they would interfere, and the register wouldn't be selected.
+ // When the interval is aligned and the above happens, there's no problem as
+ // the instruction that is actually with the last use is the one
+ // before that position.
+ selectedNextUse &= ~InstructionGapMask;
+
+ if (selectedNextUse <= current.GetStart())
+ {
+ return false;
+ }
+ else if (selectedNextUse < current.GetEnd())
+ {
+ LiveInterval splitChild = current.Split(selectedNextUse);
+
+ if (splitChild.UsesCount != 0)
+ {
+ Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
+
+ InsertInterval(splitChild, registersCount);
+ }
+ else
+ {
+ Spill(context, splitChild);
+ }
+ }
+
+ current.Register = new Register(selectedReg, regType);
+
+ if (regType == RegisterType.Integer)
+ {
+ context.IntUsedRegisters |= 1 << selectedReg;
+ }
+ else /* if (regType == RegisterType.Vector) */
+ {
+ context.VecUsedRegisters |= 1 << selectedReg;
+ }
+
+ context.Active.Set(cIndex);
+
+ return true;
+ }
+
+ private void AllocateRegWithSpill(AllocationContext context, LiveInterval current, int cIndex, int registersCount)
+ {
+ RegisterType regType = current.Local.Type.ToRegisterType();
+
+ Span<int> usePositions = stackalloc int[registersCount];
+ Span<int> blockedPositions = stackalloc int[registersCount];
+
+ context.GetFreePositions(regType, usePositions, out _);
+ context.GetFreePositions(regType, blockedPositions, out _);
+
+ foreach (int iIndex in context.Active)
+ {
+ LiveInterval interval = _intervals[iIndex];
+ Register reg = interval.Register;
+
+ if (reg.Type == regType)
+ {
+ ref int usePosition = ref usePositions[reg.Index];
+ ref int blockedPosition = ref blockedPositions[reg.Index];
+
+ if (interval.IsFixed)
+ {
+ usePosition = 0;
+ blockedPosition = 0;
+ }
+ else
+ {
+ int nextUse = interval.NextUseAfter(current.GetStart());
+
+ if (nextUse != LiveInterval.NotFound && usePosition > nextUse)
+ {
+ usePosition = nextUse;
+ }
+ }
+ }
+ }
+
+ foreach (int iIndex in context.Inactive)
+ {
+ LiveInterval interval = _intervals[iIndex];
+ Register reg = interval.Register;
+
+ if (reg.Type == regType)
+ {
+ ref int usePosition = ref usePositions[reg.Index];
+ ref int blockedPosition = ref blockedPositions[reg.Index];
+
+ if (interval.IsFixed)
+ {
+ int overlapPosition = interval.GetOverlapPosition(current);
+
+ if (overlapPosition != LiveInterval.NotFound)
+ {
+ blockedPosition = Math.Min(blockedPosition, overlapPosition);
+ usePosition = Math.Min(usePosition, overlapPosition);
+ }
+ }
+ else if (interval.Overlaps(current))
+ {
+ int nextUse = interval.NextUseAfter(current.GetStart());
+
+ if (nextUse != LiveInterval.NotFound && usePosition > nextUse)
+ {
+ usePosition = nextUse;
+ }
+ }
+ }
+ }
+
+ int selectedReg = GetHighestValueIndex(usePositions);
+ int currentFirstUse = current.FirstUse();
+
+ Debug.Assert(currentFirstUse >= 0, "Current interval has no uses.");
+
+ if (usePositions[selectedReg] < currentFirstUse)
+ {
+ // All intervals on inactive and active are being used before current,
+ // so spill the current interval.
+ Debug.Assert(currentFirstUse > current.GetStart(), "Trying to spill a interval currently being used.");
+
+ LiveInterval splitChild = current.Split(currentFirstUse);
+
+ Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
+
+ InsertInterval(splitChild, registersCount);
+
+ Spill(context, current);
+ }
+ else if (blockedPositions[selectedReg] > current.GetEnd())
+ {
+ // Spill made the register available for the entire current lifetime,
+ // so we only need to split the intervals using the selected register.
+ current.Register = new Register(selectedReg, regType);
+
+ SplitAndSpillOverlappingIntervals(context, current, registersCount);
+
+ context.Active.Set(cIndex);
+ }
+ else
+ {
+ // There are conflicts even after spill due to the use of fixed registers
+ // that can't be spilled, so we need to also split current at the point of
+ // the first fixed register use.
+ current.Register = new Register(selectedReg, regType);
+
+ int splitPosition = blockedPositions[selectedReg] & ~InstructionGapMask;
+
+ Debug.Assert(splitPosition > current.GetStart(), "Trying to split a interval at a invalid position.");
+
+ LiveInterval splitChild = current.Split(splitPosition);
+
+ if (splitChild.UsesCount != 0)
+ {
+ Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position.");
+
+ InsertInterval(splitChild, registersCount);
+ }
+ else
+ {
+ Spill(context, splitChild);
+ }
+
+ SplitAndSpillOverlappingIntervals(context, current, registersCount);
+
+ context.Active.Set(cIndex);
+ }
+ }
+
+ private static int GetHighestValueIndex(Span<int> span)
+ {
+ int highest = int.MinValue;
+
+ int selected = 0;
+
+ for (int index = 0; index < span.Length; index++)
+ {
+ int current = span[index];
+
+ if (highest < current)
+ {
+ highest = current;
+ selected = index;
+
+ if (current == int.MaxValue)
+ {
+ break;
+ }
+ }
+ }
+
+ return selected;
+ }
+
+ private void SplitAndSpillOverlappingIntervals(AllocationContext context, LiveInterval current, int registersCount)
+ {
+ foreach (int iIndex in context.Active)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (!interval.IsFixed && interval.Register == current.Register)
+ {
+ SplitAndSpillOverlappingInterval(context, current, interval, registersCount);
+
+ context.Active.Clear(iIndex);
+ }
+ }
+
+ foreach (int iIndex in context.Inactive)
+ {
+ LiveInterval interval = _intervals[iIndex];
+
+ if (!interval.IsFixed && interval.Register == current.Register && interval.Overlaps(current))
+ {
+ SplitAndSpillOverlappingInterval(context, current, interval, registersCount);
+
+ context.Inactive.Clear(iIndex);
+ }
+ }
+ }
+
+ private void SplitAndSpillOverlappingInterval(
+ AllocationContext context,
+ LiveInterval current,
+ LiveInterval interval,
+ int registersCount)
+ {
+ // If there's a next use after the start of the current interval,
+ // we need to split the spilled interval twice, and re-insert it
+ // on the "pending" list to ensure that it will get a new register
+ // on that use position.
+ int nextUse = interval.NextUseAfter(current.GetStart());
+
+ LiveInterval splitChild;
+
+ if (interval.GetStart() < current.GetStart())
+ {
+ splitChild = interval.Split(current.GetStart());
+ }
+ else
+ {
+ splitChild = interval;
+ }
+
+ if (nextUse != -1)
+ {
+ Debug.Assert(nextUse > current.GetStart(), "Trying to spill a interval currently being used.");
+
+ if (nextUse > splitChild.GetStart())
+ {
+ LiveInterval right = splitChild.Split(nextUse);
+
+ Spill(context, splitChild);
+
+ splitChild = right;
+ }
+
+ InsertInterval(splitChild, registersCount);
+ }
+ else
+ {
+ Spill(context, splitChild);
+ }
+ }
+
+ private void InsertInterval(LiveInterval interval, int registersCount)
+ {
+ Debug.Assert(interval.UsesCount != 0, "Trying to insert a interval without uses.");
+ Debug.Assert(!interval.IsEmpty, "Trying to insert a empty interval.");
+ Debug.Assert(!interval.IsSpilled, "Trying to insert a spilled interval.");
+
+ int startIndex = registersCount * 2;
+
+ int insertIndex = _intervals.BinarySearch(startIndex, _intervals.Count - startIndex, interval, null);
+
+ if (insertIndex < 0)
+ {
+ insertIndex = ~insertIndex;
+ }
+
+ _intervals.Insert(insertIndex, interval);
+ }
+
+ private void Spill(AllocationContext context, LiveInterval interval)
+ {
+ Debug.Assert(!interval.IsFixed, "Trying to spill a fixed interval.");
+ Debug.Assert(interval.UsesCount == 0, "Trying to spill a interval with uses.");
+
+ // We first check if any of the siblings were spilled, if so we can reuse
+ // the stack offset. Otherwise, we allocate a new space on the stack.
+ // This prevents stack-to-stack copies being necessary for a split interval.
+ if (!interval.TrySpillWithSiblingOffset())
+ {
+ interval.Spill(context.StackAlloc.Allocate(interval.Local.Type));
+ }
+ }
+
+ private void InsertSplitCopies()
+ {
+ Dictionary<int, CopyResolver> copyResolvers = new Dictionary<int, CopyResolver>();
+
+ CopyResolver GetCopyResolver(int position)
+ {
+ if (!copyResolvers.TryGetValue(position, out CopyResolver copyResolver))
+ {
+ copyResolver = new CopyResolver();
+
+ copyResolvers.Add(position, copyResolver);
+ }
+
+ return copyResolver;
+ }
+
+ foreach (LiveInterval interval in _intervals.Where(x => x.IsSplit))
+ {
+ LiveInterval previous = interval;
+
+ foreach (LiveInterval splitChild in interval.SplitChildren())
+ {
+ int splitPosition = splitChild.GetStart();
+
+ if (!_blockEdges.Contains(splitPosition) && previous.GetEnd() == splitPosition)
+ {
+ GetCopyResolver(splitPosition).AddSplit(previous, splitChild);
+ }
+
+ previous = splitChild;
+ }
+ }
+
+ foreach (KeyValuePair<int, CopyResolver> kv in copyResolvers)
+ {
+ CopyResolver copyResolver = kv.Value;
+
+ if (!copyResolver.HasCopy)
+ {
+ continue;
+ }
+
+ int splitPosition = kv.Key;
+
+ (IntrusiveList<Operation> nodes, Operation node) = GetOperationNode(splitPosition);
+
+ Operation[] sequence = copyResolver.Sequence();
+
+ nodes.AddBefore(node, sequence[0]);
+
+ node = sequence[0];
+
+ for (int index = 1; index < sequence.Length; index++)
+ {
+ nodes.AddAfter(node, sequence[index]);
+
+ node = sequence[index];
+ }
+ }
+ }
+
+ private void InsertSplitCopiesAtEdges(ControlFlowGraph cfg)
+ {
+ int blocksCount = cfg.Blocks.Count;
+
+ bool IsSplitEdgeBlock(BasicBlock block)
+ {
+ return block.Index >= blocksCount;
+ }
+
+ // Reset iterators to beginning because GetSplitChild depends on the state of the iterator.
+ foreach (LiveInterval interval in _intervals)
+ {
+ interval.Reset();
+ }
+
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ if (IsSplitEdgeBlock(block))
+ {
+ continue;
+ }
+
+ bool hasSingleOrNoSuccessor = block.SuccessorsCount <= 1;
+
+ for (int i = 0; i < block.SuccessorsCount; i++)
+ {
+ BasicBlock successor = block.GetSuccessor(i);
+
+ int succIndex = successor.Index;
+
+ // If the current node is a split node, then the actual successor node
+ // (the successor before the split) should be right after it.
+ if (IsSplitEdgeBlock(successor))
+ {
+ succIndex = successor.GetSuccessor(0).Index;
+ }
+
+ CopyResolver copyResolver = null;
+
+ foreach (int iIndex in _blockLiveIn[succIndex])
+ {
+ LiveInterval interval = _parentIntervals[iIndex];
+
+ if (!interval.IsSplit)
+ {
+ continue;
+ }
+
+ int lEnd = _blockRanges[block.Index].End - 1;
+ int rStart = _blockRanges[succIndex].Start;
+
+ LiveInterval left = interval.GetSplitChild(lEnd);
+ LiveInterval right = interval.GetSplitChild(rStart);
+
+ if (left != default && right != default && left != right)
+ {
+ if (copyResolver == null)
+ {
+ copyResolver = new CopyResolver();
+ }
+
+ copyResolver.AddSplit(left, right);
+ }
+ }
+
+ if (copyResolver == null || !copyResolver.HasCopy)
+ {
+ continue;
+ }
+
+ Operation[] sequence = copyResolver.Sequence();
+
+ if (hasSingleOrNoSuccessor)
+ {
+ foreach (Operation operation in sequence)
+ {
+ block.Append(operation);
+ }
+ }
+ else if (successor.Predecessors.Count == 1)
+ {
+ successor.Operations.AddFirst(sequence[0]);
+
+ Operation prependNode = sequence[0];
+
+ for (int index = 1; index < sequence.Length; index++)
+ {
+ Operation operation = sequence[index];
+
+ successor.Operations.AddAfter(prependNode, operation);
+
+ prependNode = operation;
+ }
+ }
+ else
+ {
+ // Split the critical edge.
+ BasicBlock splitBlock = cfg.SplitEdge(block, successor);
+
+ foreach (Operation operation in sequence)
+ {
+ splitBlock.Append(operation);
+ }
+ }
+ }
+ }
+ }
+
+ private void ReplaceLocalWithRegister(LiveInterval current)
+ {
+ Operand register = GetRegister(current);
+
+ foreach (int usePosition in current.UsePositions())
+ {
+ (_, Operation operation) = GetOperationNode(usePosition);
+
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ Operand source = operation.GetSource(index);
+
+ if (source == current.Local)
+ {
+ operation.SetSource(index, register);
+ }
+ else if (source.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = source.GetMemory();
+
+ if (memOp.BaseAddress == current.Local)
+ {
+ memOp.BaseAddress = register;
+ }
+
+ if (memOp.Index == current.Local)
+ {
+ memOp.Index = register;
+ }
+ }
+ }
+
+ for (int index = 0; index < operation.DestinationsCount; index++)
+ {
+ Operand dest = operation.GetDestination(index);
+
+ if (dest == current.Local)
+ {
+ operation.SetDestination(index, register);
+ }
+ }
+ }
+ }
+
+ private static Operand GetRegister(LiveInterval interval)
+ {
+ Debug.Assert(!interval.IsSpilled, "Spilled intervals are not allowed.");
+
+ return Operand.Factory.Register(
+ interval.Register.Index,
+ interval.Register.Type,
+ interval.Local.Type);
+ }
+
+ private (IntrusiveList<Operation>, Operation) GetOperationNode(int position)
+ {
+ return _operationNodes[position / InstructionGap];
+ }
+
+ private void NumberLocals(ControlFlowGraph cfg, int registersCount)
+ {
+ _operationNodes = new List<(IntrusiveList<Operation>, Operation)>();
+ _intervals = new List<LiveInterval>();
+
+ for (int index = 0; index < registersCount; index++)
+ {
+ _intervals.Add(new LiveInterval(new Register(index, RegisterType.Integer)));
+ _intervals.Add(new LiveInterval(new Register(index, RegisterType.Vector)));
+ }
+
+ // The "visited" state is stored in the MSB of the local's value.
+ const ulong VisitedMask = 1ul << 63;
+
+ bool IsVisited(Operand local)
+ {
+ return (local.GetValueUnsafe() & VisitedMask) != 0;
+ }
+
+ void SetVisited(Operand local)
+ {
+ local.GetValueUnsafe() |= VisitedMask;
+ }
+
+ _operationsCount = 0;
+
+ for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ for (Operation node = block.Operations.First; node != default; node = node.ListNext)
+ {
+ _operationNodes.Add((block.Operations, node));
+
+ for (int i = 0; i < node.DestinationsCount; i++)
+ {
+ Operand dest = node.GetDestination(i);
+
+ if (dest.Kind == OperandKind.LocalVariable && !IsVisited(dest))
+ {
+ dest.NumberLocal(_intervals.Count);
+
+ _intervals.Add(new LiveInterval(dest));
+
+ SetVisited(dest);
+ }
+ }
+ }
+
+ _operationsCount += block.Operations.Count * InstructionGap;
+
+ if (block.Operations.Count == 0)
+ {
+ // Pretend we have a dummy instruction on the empty block.
+ _operationNodes.Add((default, default));
+
+ _operationsCount += InstructionGap;
+ }
+ }
+
+ _parentIntervals = _intervals.ToArray();
+ }
+
+ private void BuildIntervals(ControlFlowGraph cfg, AllocationContext context)
+ {
+ _blockRanges = new LiveRange[cfg.Blocks.Count];
+
+ int mapSize = _intervals.Count;
+
+ BitMap[] blkLiveGen = new BitMap[cfg.Blocks.Count];
+ BitMap[] blkLiveKill = new BitMap[cfg.Blocks.Count];
+
+ // Compute local live sets.
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ BitMap liveGen = new BitMap(Allocators.Default, mapSize);
+ BitMap liveKill = new BitMap(Allocators.Default, mapSize);
+
+ for (Operation node = block.Operations.First; node != default; node = node.ListNext)
+ {
+ for (int i = 0; i < node.SourcesCount; i++)
+ {
+ VisitSource(node.GetSource(i));
+ }
+
+ for (int i = 0; i < node.DestinationsCount; i++)
+ {
+ VisitDestination(node.GetDestination(i));
+ }
+
+ void VisitSource(Operand source)
+ {
+ if (IsLocalOrRegister(source.Kind))
+ {
+ int id = GetOperandId(source);
+
+ if (!liveKill.IsSet(id))
+ {
+ liveGen.Set(id);
+ }
+ }
+ else if (source.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = source.GetMemory();
+
+ if (memOp.BaseAddress != default)
+ {
+ VisitSource(memOp.BaseAddress);
+ }
+
+ if (memOp.Index != default)
+ {
+ VisitSource(memOp.Index);
+ }
+ }
+ }
+
+ void VisitDestination(Operand dest)
+ {
+ liveKill.Set(GetOperandId(dest));
+ }
+ }
+
+ blkLiveGen [block.Index] = liveGen;
+ blkLiveKill[block.Index] = liveKill;
+ }
+
+ // Compute global live sets.
+ BitMap[] blkLiveIn = new BitMap[cfg.Blocks.Count];
+ BitMap[] blkLiveOut = new BitMap[cfg.Blocks.Count];
+
+ for (int index = 0; index < cfg.Blocks.Count; index++)
+ {
+ blkLiveIn [index] = new BitMap(Allocators.Default, mapSize);
+ blkLiveOut[index] = new BitMap(Allocators.Default, mapSize);
+ }
+
+ bool modified;
+
+ do
+ {
+ modified = false;
+
+ for (int index = 0; index < cfg.PostOrderBlocks.Length; index++)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ BitMap liveOut = blkLiveOut[block.Index];
+
+ for (int i = 0; i < block.SuccessorsCount; i++)
+ {
+ BasicBlock succ = block.GetSuccessor(i);
+
+ modified |= liveOut.Set(blkLiveIn[succ.Index]);
+ }
+
+ BitMap liveIn = blkLiveIn[block.Index];
+
+ liveIn.Set (liveOut);
+ liveIn.Clear(blkLiveKill[block.Index]);
+ liveIn.Set (blkLiveGen [block.Index]);
+ }
+ }
+ while (modified);
+
+ _blockLiveIn = blkLiveIn;
+
+ _blockEdges = new HashSet<int>();
+
+ // Compute lifetime intervals.
+ int operationPos = _operationsCount;
+
+ for (int index = 0; index < cfg.PostOrderBlocks.Length; index++)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ // We handle empty blocks by pretending they have a dummy instruction,
+ // because otherwise the block would have the same start and end position,
+ // and this is not valid.
+ int instCount = Math.Max(block.Operations.Count, 1);
+
+ int blockStart = operationPos - instCount * InstructionGap;
+ int blockEnd = operationPos;
+
+ _blockRanges[block.Index] = new LiveRange(blockStart, blockEnd);
+
+ _blockEdges.Add(blockStart);
+
+ BitMap liveOut = blkLiveOut[block.Index];
+
+ foreach (int id in liveOut)
+ {
+ _intervals[id].AddRange(blockStart, blockEnd);
+ }
+
+ if (block.Operations.Count == 0)
+ {
+ operationPos -= InstructionGap;
+
+ continue;
+ }
+
+ for (Operation node = block.Operations.Last; node != default; node = node.ListPrevious)
+ {
+ operationPos -= InstructionGap;
+
+ for (int i = 0; i < node.DestinationsCount; i++)
+ {
+ VisitDestination(node.GetDestination(i));
+ }
+
+ for (int i = 0; i < node.SourcesCount; i++)
+ {
+ VisitSource(node.GetSource(i));
+ }
+
+ if (node.Instruction == Instruction.Call)
+ {
+ AddIntervalCallerSavedReg(context.Masks.IntCallerSavedRegisters, operationPos, RegisterType.Integer);
+ AddIntervalCallerSavedReg(context.Masks.VecCallerSavedRegisters, operationPos, RegisterType.Vector);
+ }
+
+ void VisitSource(Operand source)
+ {
+ if (IsLocalOrRegister(source.Kind))
+ {
+ LiveInterval interval = _intervals[GetOperandId(source)];
+
+ interval.AddRange(blockStart, operationPos + 1);
+ interval.AddUsePosition(operationPos);
+ }
+ else if (source.Kind == OperandKind.Memory)
+ {
+ MemoryOperand memOp = source.GetMemory();
+
+ if (memOp.BaseAddress != default)
+ {
+ VisitSource(memOp.BaseAddress);
+ }
+
+ if (memOp.Index != default)
+ {
+ VisitSource(memOp.Index);
+ }
+ }
+ }
+
+ void VisitDestination(Operand dest)
+ {
+ LiveInterval interval = _intervals[GetOperandId(dest)];
+
+ if (interval.IsFixed)
+ {
+ interval.IsFixedAndUsed = true;
+ }
+
+ interval.SetStart(operationPos + 1);
+ interval.AddUsePosition(operationPos + 1);
+ }
+ }
+ }
+
+ foreach (LiveInterval interval in _parentIntervals)
+ {
+ interval.Reset();
+ }
+ }
+
+ private void AddIntervalCallerSavedReg(int mask, int operationPos, RegisterType regType)
+ {
+ while (mask != 0)
+ {
+ int regIndex = BitOperations.TrailingZeroCount(mask);
+
+ Register callerSavedReg = new Register(regIndex, regType);
+
+ LiveInterval interval = _intervals[GetRegisterId(callerSavedReg)];
+
+ interval.AddRange(operationPos + 1, operationPos + InstructionGap);
+
+ mask &= ~(1 << regIndex);
+ }
+ }
+
+ private static int GetOperandId(Operand operand)
+ {
+ if (operand.Kind == OperandKind.LocalVariable)
+ {
+ return operand.GetLocalNumber();
+ }
+ else if (operand.Kind == OperandKind.Register)
+ {
+ return GetRegisterId(operand.GetRegister());
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid operand kind \"{operand.Kind}\".");
+ }
+ }
+
+ private static int GetRegisterId(Register register)
+ {
+ return (register.Index << 1) | (register.Type == RegisterType.Vector ? 1 : 0);
+ }
+
+ private static bool IsLocalOrRegister(OperandKind kind)
+ {
+ return kind == OperandKind.LocalVariable ||
+ kind == OperandKind.Register;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs
new file mode 100644
index 00000000..d739ad28
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs
@@ -0,0 +1,396 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ unsafe readonly struct LiveInterval : IComparable<LiveInterval>
+ {
+ public const int NotFound = -1;
+
+ private struct Data
+ {
+ public int End;
+ public int SpillOffset;
+
+ public LiveRange FirstRange;
+ public LiveRange PrevRange;
+ public LiveRange CurrRange;
+
+ public LiveInterval Parent;
+
+ public UseList Uses;
+ public LiveIntervalList Children;
+
+ public Operand Local;
+ public Register Register;
+
+ public bool IsFixed;
+ public bool IsFixedAndUsed;
+ }
+
+ private readonly Data* _data;
+
+ private ref int End => ref _data->End;
+ private ref LiveRange FirstRange => ref _data->FirstRange;
+ private ref LiveRange CurrRange => ref _data->CurrRange;
+ private ref LiveRange PrevRange => ref _data->PrevRange;
+ private ref LiveInterval Parent => ref _data->Parent;
+ private ref UseList Uses => ref _data->Uses;
+ private ref LiveIntervalList Children => ref _data->Children;
+
+ public Operand Local => _data->Local;
+ public ref Register Register => ref _data->Register;
+ public ref int SpillOffset => ref _data->SpillOffset;
+
+ public bool IsFixed => _data->IsFixed;
+ public ref bool IsFixedAndUsed => ref _data->IsFixedAndUsed;
+ public bool IsEmpty => FirstRange == default;
+ public bool IsSplit => Children.Count != 0;
+ public bool IsSpilled => SpillOffset != -1;
+
+ public int UsesCount => Uses.Count;
+
+ public LiveInterval(Operand local = default, LiveInterval parent = default)
+ {
+ _data = Allocators.LiveIntervals.Allocate<Data>();
+ *_data = default;
+
+ _data->IsFixed = false;
+ _data->Local = local;
+
+ Parent = parent == default ? this : parent;
+ Uses = new UseList();
+ Children = new LiveIntervalList();
+
+ FirstRange = default;
+ CurrRange = default;
+ PrevRange = default;
+
+ SpillOffset = -1;
+ }
+
+ public LiveInterval(Register register) : this(local: default, parent: default)
+ {
+ _data->IsFixed = true;
+
+ Register = register;
+ }
+
+ public void Reset()
+ {
+ PrevRange = default;
+ CurrRange = FirstRange;
+ }
+
+ public void Forward(int position)
+ {
+ LiveRange prev = PrevRange;
+ LiveRange curr = CurrRange;
+
+ while (curr != default && curr.Start < position && !curr.Overlaps(position))
+ {
+ prev = curr;
+ curr = curr.Next;
+ }
+
+ PrevRange = prev;
+ CurrRange = curr;
+ }
+
+ public int GetStart()
+ {
+ Debug.Assert(!IsEmpty, "Empty LiveInterval cannot have a start position.");
+
+ return FirstRange.Start;
+ }
+
+ public void SetStart(int position)
+ {
+ if (FirstRange != default)
+ {
+ Debug.Assert(position != FirstRange.End);
+
+ FirstRange.Start = position;
+ }
+ else
+ {
+ FirstRange = new LiveRange(position, position + 1);
+ End = position + 1;
+ }
+ }
+
+ public int GetEnd()
+ {
+ Debug.Assert(!IsEmpty, "Empty LiveInterval cannot have an end position.");
+
+ return End;
+ }
+
+ public void AddRange(int start, int end)
+ {
+ Debug.Assert(start < end, $"Invalid range start position {start}, {end}");
+
+ if (FirstRange != default)
+ {
+ // If the new range ends exactly where the first range start, then coalesce together.
+ if (end == FirstRange.Start)
+ {
+ FirstRange.Start = start;
+
+ return;
+ }
+ // If the new range is already contained, then coalesce together.
+ else if (FirstRange.Overlaps(start, end))
+ {
+ FirstRange.Start = Math.Min(FirstRange.Start, start);
+ FirstRange.End = Math.Max(FirstRange.End, end);
+ End = Math.Max(End, end);
+
+ Debug.Assert(FirstRange.Next == default || !FirstRange.Overlaps(FirstRange.Next));
+ return;
+ }
+ }
+
+ FirstRange = new LiveRange(start, end, FirstRange);
+ End = Math.Max(End, end);
+
+ Debug.Assert(FirstRange.Next == default || !FirstRange.Overlaps(FirstRange.Next));
+ }
+
+ public void AddUsePosition(int position)
+ {
+ Uses.Add(position);
+ }
+
+ public bool Overlaps(int position)
+ {
+ LiveRange curr = CurrRange;
+
+ while (curr != default && curr.Start <= position)
+ {
+ if (curr.Overlaps(position))
+ {
+ return true;
+ }
+
+ curr = curr.Next;
+ }
+
+ return false;
+ }
+
+ public bool Overlaps(LiveInterval other)
+ {
+ return GetOverlapPosition(other) != NotFound;
+ }
+
+ public int GetOverlapPosition(LiveInterval other)
+ {
+ LiveRange a = CurrRange;
+ LiveRange b = other.CurrRange;
+
+ while (a != default)
+ {
+ while (b != default && b.Start < a.Start)
+ {
+ if (a.Overlaps(b))
+ {
+ return a.Start;
+ }
+
+ b = b.Next;
+ }
+
+ if (b == default)
+ {
+ break;
+ }
+ else if (a.Overlaps(b))
+ {
+ return a.Start;
+ }
+
+ a = a.Next;
+ }
+
+ return NotFound;
+ }
+
+ public ReadOnlySpan<LiveInterval> SplitChildren()
+ {
+ return Parent.Children.Span;
+ }
+
+ public ReadOnlySpan<int> UsePositions()
+ {
+ return Uses.Span;
+ }
+
+ public int FirstUse()
+ {
+ return Uses.FirstUse;
+ }
+
+ public int NextUseAfter(int position)
+ {
+ return Uses.NextUse(position);
+ }
+
+ public LiveInterval Split(int position)
+ {
+ LiveInterval result = new(Local, Parent);
+ result.End = End;
+
+ LiveRange prev = PrevRange;
+ LiveRange curr = CurrRange;
+
+ while (curr != default && curr.Start < position && !curr.Overlaps(position))
+ {
+ prev = curr;
+ curr = curr.Next;
+ }
+
+ if (curr.Start >= position)
+ {
+ prev.Next = default;
+
+ result.FirstRange = curr;
+
+ End = prev.End;
+ }
+ else
+ {
+ result.FirstRange = new LiveRange(position, curr.End, curr.Next);
+
+ curr.End = position;
+ curr.Next = default;
+
+ End = curr.End;
+ }
+
+ result.Uses = Uses.Split(position);
+
+ AddSplitChild(result);
+
+ Debug.Assert(!IsEmpty, "Left interval is empty after split.");
+ Debug.Assert(!result.IsEmpty, "Right interval is empty after split.");
+
+ // Make sure the iterator in the new split is pointing to the start.
+ result.Reset();
+
+ return result;
+ }
+
+ private void AddSplitChild(LiveInterval child)
+ {
+ Debug.Assert(!child.IsEmpty, "Trying to insert an empty interval.");
+
+ Parent.Children.Add(child);
+ }
+
+ public LiveInterval GetSplitChild(int position)
+ {
+ if (Overlaps(position))
+ {
+ return this;
+ }
+
+ foreach (LiveInterval splitChild in SplitChildren())
+ {
+ if (splitChild.Overlaps(position))
+ {
+ return splitChild;
+ }
+ else if (splitChild.GetStart() > position)
+ {
+ break;
+ }
+ }
+
+ return default;
+ }
+
+ public bool TrySpillWithSiblingOffset()
+ {
+ foreach (LiveInterval splitChild in SplitChildren())
+ {
+ if (splitChild.IsSpilled)
+ {
+ Spill(splitChild.SpillOffset);
+
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ public void Spill(int offset)
+ {
+ SpillOffset = offset;
+ }
+
+ public int CompareTo(LiveInterval interval)
+ {
+ if (FirstRange == default || interval.FirstRange == default)
+ {
+ return 0;
+ }
+
+ return GetStart().CompareTo(interval.GetStart());
+ }
+
+ public bool Equals(LiveInterval interval)
+ {
+ return interval._data == _data;
+ }
+
+ public override bool Equals(object obj)
+ {
+ return obj is LiveInterval interval && Equals(interval);
+ }
+
+ public static bool operator ==(LiveInterval a, LiveInterval b)
+ {
+ return a.Equals(b);
+ }
+
+ public static bool operator !=(LiveInterval a, LiveInterval b)
+ {
+ return !a.Equals(b);
+ }
+
+ public override int GetHashCode()
+ {
+ return HashCode.Combine((IntPtr)_data);
+ }
+
+ public override string ToString()
+ {
+ LiveInterval self = this;
+
+ IEnumerable<string> GetRanges()
+ {
+ LiveRange curr = self.CurrRange;
+
+ while (curr != default)
+ {
+ if (curr == self.CurrRange)
+ {
+ yield return "*" + curr;
+ }
+ else
+ {
+ yield return curr.ToString();
+ }
+
+ curr = curr.Next;
+ }
+ }
+
+ return string.Join(", ", GetRanges());
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/LiveIntervalList.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveIntervalList.cs
new file mode 100644
index 00000000..06b979ea
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveIntervalList.cs
@@ -0,0 +1,40 @@
+using System;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ unsafe struct LiveIntervalList
+ {
+ private LiveInterval* _items;
+ private int _count;
+ private int _capacity;
+
+ public int Count => _count;
+ public Span<LiveInterval> Span => new(_items, _count);
+
+ public void Add(LiveInterval interval)
+ {
+ if (_count + 1 > _capacity)
+ {
+ var oldSpan = Span;
+
+ _capacity = Math.Max(4, _capacity * 2);
+ _items = Allocators.References.Allocate<LiveInterval>((uint)_capacity);
+
+ var newSpan = Span;
+
+ oldSpan.CopyTo(newSpan);
+ }
+
+ int position = interval.GetStart();
+ int i = _count - 1;
+
+ while (i >= 0 && _items[i].GetStart() > position)
+ {
+ _items[i + 1] = _items[i--];
+ }
+
+ _items[i + 1] = interval;
+ _count++;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs
new file mode 100644
index 00000000..e38b5190
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs
@@ -0,0 +1,74 @@
+using System;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ unsafe readonly struct LiveRange : IEquatable<LiveRange>
+ {
+ private struct Data
+ {
+ public int Start;
+ public int End;
+ public LiveRange Next;
+ }
+
+ private readonly Data* _data;
+
+ public ref int Start => ref _data->Start;
+ public ref int End => ref _data->End;
+ public ref LiveRange Next => ref _data->Next;
+
+ public LiveRange(int start, int end, LiveRange next = default)
+ {
+ _data = Allocators.LiveRanges.Allocate<Data>();
+
+ Start = start;
+ End = end;
+ Next = next;
+ }
+
+ public bool Overlaps(int start, int end)
+ {
+ return Start < end && start < End;
+ }
+
+ public bool Overlaps(LiveRange range)
+ {
+ return Start < range.End && range.Start < End;
+ }
+
+ public bool Overlaps(int position)
+ {
+ return position >= Start && position < End;
+ }
+
+ public bool Equals(LiveRange range)
+ {
+ return range._data == _data;
+ }
+
+ public override bool Equals(object obj)
+ {
+ return obj is LiveRange range && Equals(range);
+ }
+
+ public static bool operator ==(LiveRange a, LiveRange b)
+ {
+ return a.Equals(b);
+ }
+
+ public static bool operator !=(LiveRange a, LiveRange b)
+ {
+ return !a.Equals(b);
+ }
+
+ public override int GetHashCode()
+ {
+ return HashCode.Combine((IntPtr)_data);
+ }
+
+ public override string ToString()
+ {
+ return $"[{Start}, {End})";
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs
new file mode 100644
index 00000000..bc948f95
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs
@@ -0,0 +1,50 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ readonly struct RegisterMasks
+ {
+ public int IntAvailableRegisters { get; }
+ public int VecAvailableRegisters { get; }
+ public int IntCallerSavedRegisters { get; }
+ public int VecCallerSavedRegisters { get; }
+ public int IntCalleeSavedRegisters { get; }
+ public int VecCalleeSavedRegisters { get; }
+ public int RegistersCount { get; }
+
+ public RegisterMasks(
+ int intAvailableRegisters,
+ int vecAvailableRegisters,
+ int intCallerSavedRegisters,
+ int vecCallerSavedRegisters,
+ int intCalleeSavedRegisters,
+ int vecCalleeSavedRegisters,
+ int registersCount)
+ {
+ IntAvailableRegisters = intAvailableRegisters;
+ VecAvailableRegisters = vecAvailableRegisters;
+ IntCallerSavedRegisters = intCallerSavedRegisters;
+ VecCallerSavedRegisters = vecCallerSavedRegisters;
+ IntCalleeSavedRegisters = intCalleeSavedRegisters;
+ VecCalleeSavedRegisters = vecCalleeSavedRegisters;
+ RegistersCount = registersCount;
+ }
+
+ public int GetAvailableRegisters(RegisterType type)
+ {
+ if (type == RegisterType.Integer)
+ {
+ return IntAvailableRegisters;
+ }
+ else if (type == RegisterType.Vector)
+ {
+ return VecAvailableRegisters;
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid register type \"{type}\".");
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs
new file mode 100644
index 00000000..038312fe
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs
@@ -0,0 +1,25 @@
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ class StackAllocator
+ {
+ private int _offset;
+
+ public int TotalSize => _offset;
+
+ public int Allocate(OperandType type)
+ {
+ return Allocate(type.GetSizeInBytes());
+ }
+
+ public int Allocate(int sizeInBytes)
+ {
+ int offset = _offset;
+
+ _offset += sizeInBytes;
+
+ return offset;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/UseList.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/UseList.cs
new file mode 100644
index 00000000..c89f0854
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/RegisterAllocators/UseList.cs
@@ -0,0 +1,84 @@
+using System;
+
+namespace ARMeilleure.CodeGen.RegisterAllocators
+{
+ unsafe struct UseList
+ {
+ private int* _items;
+ private int _capacity;
+ private int _count;
+
+ public int Count => _count;
+ public int FirstUse => _count > 0 ? _items[_count - 1] : LiveInterval.NotFound;
+ public Span<int> Span => new(_items, _count);
+
+ public void Add(int position)
+ {
+ if (_count + 1 > _capacity)
+ {
+ var oldSpan = Span;
+
+ _capacity = Math.Max(4, _capacity * 2);
+ _items = Allocators.Default.Allocate<int>((uint)_capacity);
+
+ var newSpan = Span;
+
+ oldSpan.CopyTo(newSpan);
+ }
+
+ // Use positions are usually inserted in descending order, so inserting in descending order is faster,
+ // since the number of half exchanges is reduced.
+ int i = _count - 1;
+
+ while (i >= 0 && _items[i] < position)
+ {
+ _items[i + 1] = _items[i--];
+ }
+
+ _items[i + 1] = position;
+ _count++;
+ }
+
+ public int NextUse(int position)
+ {
+ int index = NextUseIndex(position);
+
+ return index != LiveInterval.NotFound ? _items[index] : LiveInterval.NotFound;
+ }
+
+ public int NextUseIndex(int position)
+ {
+ int i = _count - 1;
+
+ if (i == -1 || position > _items[0])
+ {
+ return LiveInterval.NotFound;
+ }
+
+ while (i >= 0 && _items[i] < position)
+ {
+ i--;
+ }
+
+ return i;
+ }
+
+ public UseList Split(int position)
+ {
+ int index = NextUseIndex(position);
+
+ // Since the list is in descending order, the new split list takes the front of the list and the current
+ // list takes the back of the list.
+ UseList result = new();
+ result._count = index + 1;
+ result._capacity = result._count;
+ result._items = _items;
+
+ _count = _count - result._count;
+ _capacity = _count;
+ _items = _items + result._count;
+
+ return result;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs b/src/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs
new file mode 100644
index 00000000..3d0bc21d
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs
@@ -0,0 +1,16 @@
+namespace ARMeilleure.CodeGen.Unwinding
+{
+ struct UnwindInfo
+ {
+ public const int Stride = 4; // Bytes.
+
+ public UnwindPushEntry[] PushEntries { get; }
+ public int PrologSize { get; }
+
+ public UnwindInfo(UnwindPushEntry[] pushEntries, int prologSize)
+ {
+ PushEntries = pushEntries;
+ PrologSize = prologSize;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Unwinding/UnwindPseudoOp.cs b/src/ARMeilleure/CodeGen/Unwinding/UnwindPseudoOp.cs
new file mode 100644
index 00000000..4a8288a2
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Unwinding/UnwindPseudoOp.cs
@@ -0,0 +1,11 @@
+namespace ARMeilleure.CodeGen.Unwinding
+{
+ enum UnwindPseudoOp
+ {
+ PushReg = 0,
+ SetFrame = 1,
+ AllocStack = 2,
+ SaveReg = 3,
+ SaveXmm128 = 4
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs b/src/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs
new file mode 100644
index 00000000..fd8ea402
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs
@@ -0,0 +1,20 @@
+namespace ARMeilleure.CodeGen.Unwinding
+{
+ struct UnwindPushEntry
+ {
+ public const int Stride = 16; // Bytes.
+
+ public UnwindPseudoOp PseudoOp { get; }
+ public int PrologOffset { get; }
+ public int RegIndex { get; }
+ public int StackOffsetOrAllocSize { get; }
+
+ public UnwindPushEntry(UnwindPseudoOp pseudoOp, int prologOffset, int regIndex = -1, int stackOffsetOrAllocSize = -1)
+ {
+ PseudoOp = pseudoOp;
+ PrologOffset = prologOffset;
+ RegIndex = regIndex;
+ StackOffsetOrAllocSize = stackOffsetOrAllocSize;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/Assembler.cs b/src/ARMeilleure/CodeGen/X86/Assembler.cs
new file mode 100644
index 00000000..67736a31
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/Assembler.cs
@@ -0,0 +1,1559 @@
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.IntermediateRepresentation;
+using Ryujinx.Common.Memory;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Runtime.InteropServices;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ partial class Assembler
+ {
+ private const int ReservedBytesForJump = 1;
+
+ private const int OpModRMBits = 24;
+
+ private const byte RexPrefix = 0x40;
+ private const byte RexWPrefix = 0x48;
+ private const byte LockPrefix = 0xf0;
+
+ private const int MaxRegNumber = 15;
+
+ private struct Jump
+ {
+ public bool IsConditional { get; }
+ public X86Condition Condition { get; }
+ public Operand JumpLabel { get; }
+ public long? JumpTarget { get; set; }
+ public long JumpPosition { get; }
+ public long Offset { get; set; }
+ public int InstSize { get; set; }
+
+ public Jump(Operand jumpLabel, long jumpPosition)
+ {
+ IsConditional = false;
+ Condition = 0;
+ JumpLabel = jumpLabel;
+ JumpTarget = null;
+ JumpPosition = jumpPosition;
+
+ Offset = 0;
+ InstSize = 0;
+ }
+
+ public Jump(X86Condition condition, Operand jumpLabel, long jumpPosition)
+ {
+ IsConditional = true;
+ Condition = condition;
+ JumpLabel = jumpLabel;
+ JumpTarget = null;
+ JumpPosition = jumpPosition;
+
+ Offset = 0;
+ InstSize = 0;
+ }
+ }
+
+ private struct Reloc
+ {
+ public int JumpIndex { get; set; }
+ public int Position { get; set; }
+ public Symbol Symbol { get; set; }
+ }
+
+ private readonly List<Jump> _jumps;
+ private readonly List<Reloc> _relocs;
+ private readonly Dictionary<Operand, long> _labels;
+ private readonly Stream _stream;
+
+ public bool HasRelocs => _relocs != null;
+
+ public Assembler(Stream stream, bool relocatable)
+ {
+ _stream = stream;
+ _labels = new Dictionary<Operand, long>();
+ _jumps = new List<Jump>();
+
+ _relocs = relocatable ? new List<Reloc>() : null;
+ }
+
+ public void MarkLabel(Operand label)
+ {
+ _labels.Add(label, _stream.Position);
+ }
+
+ public void Add(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Add);
+ }
+
+ public void Addsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Addsd);
+ }
+
+ public void Addss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Addss);
+ }
+
+ public void And(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.And);
+ }
+
+ public void Bsr(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Bsr);
+ }
+
+ public void Bswap(Operand dest)
+ {
+ WriteInstruction(dest, default, dest.Type, X86Instruction.Bswap);
+ }
+
+ public void Call(Operand dest)
+ {
+ WriteInstruction(dest, default, OperandType.None, X86Instruction.Call);
+ }
+
+ public void Cdq()
+ {
+ WriteByte(0x99);
+ }
+
+ public void Cmovcc(Operand dest, Operand source, OperandType type, X86Condition condition)
+ {
+ ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Cmovcc];
+
+ WriteOpCode(dest, default, source, type, info.Flags, info.OpRRM | (int)condition, rrm: true);
+ }
+
+ public void Cmp(Operand src1, Operand src2, OperandType type)
+ {
+ WriteInstruction(src1, src2, type, X86Instruction.Cmp);
+ }
+
+ public void Cqo()
+ {
+ WriteByte(0x48);
+ WriteByte(0x99);
+ }
+
+ public void Cmpxchg(Operand memOp, Operand src)
+ {
+ Debug.Assert(memOp.Kind == OperandKind.Memory);
+
+ WriteByte(LockPrefix);
+
+ WriteInstruction(memOp, src, src.Type, X86Instruction.Cmpxchg);
+ }
+
+ public void Cmpxchg16(Operand memOp, Operand src)
+ {
+ Debug.Assert(memOp.Kind == OperandKind.Memory);
+
+ WriteByte(LockPrefix);
+ WriteByte(0x66);
+
+ WriteInstruction(memOp, src, src.Type, X86Instruction.Cmpxchg);
+ }
+
+ public void Cmpxchg16b(Operand memOp)
+ {
+ Debug.Assert(memOp.Kind == OperandKind.Memory);
+
+ WriteByte(LockPrefix);
+
+ WriteInstruction(memOp, default, OperandType.None, X86Instruction.Cmpxchg16b);
+ }
+
+ public void Cmpxchg8(Operand memOp, Operand src)
+ {
+ Debug.Assert(memOp.Kind == OperandKind.Memory);
+
+ WriteByte(LockPrefix);
+
+ WriteInstruction(memOp, src, src.Type, X86Instruction.Cmpxchg8);
+ }
+
+ public void Comisd(Operand src1, Operand src2)
+ {
+ WriteInstruction(src1, default, src2, X86Instruction.Comisd);
+ }
+
+ public void Comiss(Operand src1, Operand src2)
+ {
+ WriteInstruction(src1, default, src2, X86Instruction.Comiss);
+ }
+
+ public void Cvtsd2ss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Cvtsd2ss);
+ }
+
+ public void Cvtsi2sd(Operand dest, Operand src1, Operand src2, OperandType type)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Cvtsi2sd, type);
+ }
+
+ public void Cvtsi2ss(Operand dest, Operand src1, Operand src2, OperandType type)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Cvtsi2ss, type);
+ }
+
+ public void Cvtss2sd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Cvtss2sd);
+ }
+
+ public void Div(Operand source)
+ {
+ WriteInstruction(default, source, source.Type, X86Instruction.Div);
+ }
+
+ public void Divsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Divsd);
+ }
+
+ public void Divss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Divss);
+ }
+
+ public void Idiv(Operand source)
+ {
+ WriteInstruction(default, source, source.Type, X86Instruction.Idiv);
+ }
+
+ public void Imul(Operand source)
+ {
+ WriteInstruction(default, source, source.Type, X86Instruction.Imul128);
+ }
+
+ public void Imul(Operand dest, Operand source, OperandType type)
+ {
+ if (source.Kind != OperandKind.Register)
+ {
+ throw new ArgumentException($"Invalid source operand kind \"{source.Kind}\".");
+ }
+
+ WriteInstruction(dest, source, type, X86Instruction.Imul);
+ }
+
+ public void Imul(Operand dest, Operand src1, Operand src2, OperandType type)
+ {
+ ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Imul];
+
+ if (src2.Kind != OperandKind.Constant)
+ {
+ throw new ArgumentException($"Invalid source 2 operand kind \"{src2.Kind}\".");
+ }
+
+ if (IsImm8(src2.Value, src2.Type) && info.OpRMImm8 != BadOp)
+ {
+ WriteOpCode(dest, default, src1, type, info.Flags, info.OpRMImm8, rrm: true);
+
+ WriteByte(src2.AsByte());
+ }
+ else if (IsImm32(src2.Value, src2.Type) && info.OpRMImm32 != BadOp)
+ {
+ WriteOpCode(dest, default, src1, type, info.Flags, info.OpRMImm32, rrm: true);
+
+ WriteInt32(src2.AsInt32());
+ }
+ else
+ {
+ throw new ArgumentException($"Failed to encode constant 0x{src2.Value:X}.");
+ }
+ }
+
+ public void Insertps(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Insertps);
+
+ WriteByte(imm);
+ }
+
+ public void Jcc(X86Condition condition, Operand dest)
+ {
+ if (dest.Kind == OperandKind.Label)
+ {
+ _jumps.Add(new Jump(condition, dest, _stream.Position));
+
+ // ReservedBytesForJump
+ WriteByte(0);
+ }
+ else
+ {
+ throw new ArgumentException("Destination operand must be of kind Label", nameof(dest));
+ }
+ }
+
+ public void Jcc(X86Condition condition, long offset)
+ {
+ if (ConstFitsOnS8(offset))
+ {
+ WriteByte((byte)(0x70 | (int)condition));
+
+ WriteByte((byte)offset);
+ }
+ else if (ConstFitsOnS32(offset))
+ {
+ WriteByte(0x0f);
+ WriteByte((byte)(0x80 | (int)condition));
+
+ WriteInt32((int)offset);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(offset));
+ }
+ }
+
+ public void Jmp(long offset)
+ {
+ if (ConstFitsOnS8(offset))
+ {
+ WriteByte(0xeb);
+
+ WriteByte((byte)offset);
+ }
+ else if (ConstFitsOnS32(offset))
+ {
+ WriteByte(0xe9);
+
+ WriteInt32((int)offset);
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(offset));
+ }
+ }
+
+ public void Jmp(Operand dest)
+ {
+ if (dest.Kind == OperandKind.Label)
+ {
+ _jumps.Add(new Jump(dest, _stream.Position));
+
+ // ReservedBytesForJump
+ WriteByte(0);
+ }
+ else
+ {
+ WriteInstruction(dest, default, OperandType.None, X86Instruction.Jmp);
+ }
+ }
+
+ public void Ldmxcsr(Operand dest)
+ {
+ WriteInstruction(dest, default, OperandType.I32, X86Instruction.Ldmxcsr);
+ }
+
+ public void Lea(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Lea);
+ }
+
+ public void LockOr(Operand dest, Operand source, OperandType type)
+ {
+ WriteByte(LockPrefix);
+ WriteInstruction(dest, source, type, X86Instruction.Or);
+ }
+
+ public void Mov(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Mov);
+ }
+
+ public void Mov16(Operand dest, Operand source)
+ {
+ WriteInstruction(dest, source, OperandType.None, X86Instruction.Mov16);
+ }
+
+ public void Mov8(Operand dest, Operand source)
+ {
+ WriteInstruction(dest, source, OperandType.None, X86Instruction.Mov8);
+ }
+
+ public void Movd(Operand dest, Operand source)
+ {
+ ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Movd];
+
+ if (source.Type.IsInteger() || source.Kind == OperandKind.Memory)
+ {
+ WriteOpCode(dest, default, source, OperandType.None, info.Flags, info.OpRRM, rrm: true);
+ }
+ else
+ {
+ WriteOpCode(dest, default, source, OperandType.None, info.Flags, info.OpRMR);
+ }
+ }
+
+ public void Movdqu(Operand dest, Operand source)
+ {
+ WriteInstruction(dest, default, source, X86Instruction.Movdqu);
+ }
+
+ public void Movhlps(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Movhlps);
+ }
+
+ public void Movlhps(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Movlhps);
+ }
+
+ public void Movq(Operand dest, Operand source)
+ {
+ ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Movd];
+
+ InstructionFlags flags = info.Flags | InstructionFlags.RexW;
+
+ if (source.Type.IsInteger() || source.Kind == OperandKind.Memory)
+ {
+ WriteOpCode(dest, default, source, OperandType.None, flags, info.OpRRM, rrm: true);
+ }
+ else if (dest.Type.IsInteger() || dest.Kind == OperandKind.Memory)
+ {
+ WriteOpCode(dest, default, source, OperandType.None, flags, info.OpRMR);
+ }
+ else
+ {
+ WriteInstruction(dest, source, OperandType.None, X86Instruction.Movq);
+ }
+ }
+
+ public void Movsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Movsd);
+ }
+
+ public void Movss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Movss);
+ }
+
+ public void Movsx16(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movsx16);
+ }
+
+ public void Movsx32(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movsx32);
+ }
+
+ public void Movsx8(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movsx8);
+ }
+
+ public void Movzx16(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movzx16);
+ }
+
+ public void Movzx8(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Movzx8);
+ }
+
+ public void Mul(Operand source)
+ {
+ WriteInstruction(default, source, source.Type, X86Instruction.Mul128);
+ }
+
+ public void Mulsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Mulsd);
+ }
+
+ public void Mulss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Mulss);
+ }
+
+ public void Neg(Operand dest)
+ {
+ WriteInstruction(dest, default, dest.Type, X86Instruction.Neg);
+ }
+
+ public void Not(Operand dest)
+ {
+ WriteInstruction(dest, default, dest.Type, X86Instruction.Not);
+ }
+
+ public void Or(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Or);
+ }
+
+ public void Pclmulqdq(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, default, source, X86Instruction.Pclmulqdq);
+
+ WriteByte(imm);
+ }
+
+ public void Pcmpeqw(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pcmpeqw);
+ }
+
+ public void Pextrb(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, default, source, X86Instruction.Pextrb);
+
+ WriteByte(imm);
+ }
+
+ public void Pextrd(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, default, source, X86Instruction.Pextrd);
+
+ WriteByte(imm);
+ }
+
+ public void Pextrq(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, default, source, X86Instruction.Pextrq);
+
+ WriteByte(imm);
+ }
+
+ public void Pextrw(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, default, source, X86Instruction.Pextrw);
+
+ WriteByte(imm);
+ }
+
+ public void Pinsrb(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pinsrb);
+
+ WriteByte(imm);
+ }
+
+ public void Pinsrd(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pinsrd);
+
+ WriteByte(imm);
+ }
+
+ public void Pinsrq(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pinsrq);
+
+ WriteByte(imm);
+ }
+
+ public void Pinsrw(Operand dest, Operand src1, Operand src2, byte imm)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Pinsrw);
+
+ WriteByte(imm);
+ }
+
+ public void Pop(Operand dest)
+ {
+ if (dest.Kind == OperandKind.Register)
+ {
+ WriteCompactInst(dest, 0x58);
+ }
+ else
+ {
+ WriteInstruction(dest, default, dest.Type, X86Instruction.Pop);
+ }
+ }
+
+ public void Popcnt(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Popcnt);
+ }
+
+ public void Pshufd(Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, default, source, X86Instruction.Pshufd);
+
+ WriteByte(imm);
+ }
+
+ public void Push(Operand source)
+ {
+ if (source.Kind == OperandKind.Register)
+ {
+ WriteCompactInst(source, 0x50);
+ }
+ else
+ {
+ WriteInstruction(default, source, source.Type, X86Instruction.Push);
+ }
+ }
+
+ public void Return()
+ {
+ WriteByte(0xc3);
+ }
+
+ public void Ror(Operand dest, Operand source, OperandType type)
+ {
+ WriteShiftInst(dest, source, type, X86Instruction.Ror);
+ }
+
+ public void Sar(Operand dest, Operand source, OperandType type)
+ {
+ WriteShiftInst(dest, source, type, X86Instruction.Sar);
+ }
+
+ public void Shl(Operand dest, Operand source, OperandType type)
+ {
+ WriteShiftInst(dest, source, type, X86Instruction.Shl);
+ }
+
+ public void Shr(Operand dest, Operand source, OperandType type)
+ {
+ WriteShiftInst(dest, source, type, X86Instruction.Shr);
+ }
+
+ public void Setcc(Operand dest, X86Condition condition)
+ {
+ ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Setcc];
+
+ WriteOpCode(dest, default, default, OperandType.None, info.Flags, info.OpRRM | (int)condition);
+ }
+
+ public void Stmxcsr(Operand dest)
+ {
+ WriteInstruction(dest, default, OperandType.I32, X86Instruction.Stmxcsr);
+ }
+
+ public void Sub(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Sub);
+ }
+
+ public void Subsd(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Subsd);
+ }
+
+ public void Subss(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Subss);
+ }
+
+ public void Test(Operand src1, Operand src2, OperandType type)
+ {
+ WriteInstruction(src1, src2, type, X86Instruction.Test);
+ }
+
+ public void Xor(Operand dest, Operand source, OperandType type)
+ {
+ WriteInstruction(dest, source, type, X86Instruction.Xor);
+ }
+
+ public void Xorps(Operand dest, Operand src1, Operand src2)
+ {
+ WriteInstruction(dest, src1, src2, X86Instruction.Xorps);
+ }
+
+ public void WriteInstruction(
+ X86Instruction inst,
+ Operand dest,
+ Operand source,
+ OperandType type = OperandType.None)
+ {
+ WriteInstruction(dest, default, source, inst, type);
+ }
+
+ public void WriteInstruction(X86Instruction inst, Operand dest, Operand src1, Operand src2)
+ {
+ if (src2.Kind == OperandKind.Constant)
+ {
+ WriteInstruction(src1, dest, src2, inst);
+ }
+ else
+ {
+ WriteInstruction(dest, src1, src2, inst);
+ }
+ }
+
+ public void WriteInstruction(
+ X86Instruction inst,
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ OperandType type)
+ {
+ WriteInstruction(dest, src1, src2, inst, type);
+ }
+
+ public void WriteInstruction(X86Instruction inst, Operand dest, Operand source, byte imm)
+ {
+ WriteInstruction(dest, default, source, inst);
+
+ WriteByte(imm);
+ }
+
+ public void WriteInstruction(
+ X86Instruction inst,
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ Operand src3)
+ {
+ // 3+ operands can only be encoded with the VEX encoding scheme.
+ Debug.Assert(HardwareCapabilities.SupportsVexEncoding);
+
+ WriteInstruction(dest, src1, src2, inst);
+
+ WriteByte((byte)(src3.AsByte() << 4));
+ }
+
+ public void WriteInstruction(
+ X86Instruction inst,
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ byte imm)
+ {
+ WriteInstruction(dest, src1, src2, inst);
+
+ WriteByte(imm);
+ }
+
+ private void WriteShiftInst(Operand dest, Operand source, OperandType type, X86Instruction inst)
+ {
+ if (source.Kind == OperandKind.Register)
+ {
+ X86Register shiftReg = (X86Register)source.GetRegister().Index;
+
+ Debug.Assert(shiftReg == X86Register.Rcx, $"Invalid shift register \"{shiftReg}\".");
+
+ source = default;
+ }
+ else if (source.Kind == OperandKind.Constant)
+ {
+ source = Operand.Factory.Const((int)source.Value & (dest.Type == OperandType.I32 ? 0x1f : 0x3f));
+ }
+
+ WriteInstruction(dest, source, type, inst);
+ }
+
+ private void WriteInstruction(Operand dest, Operand source, OperandType type, X86Instruction inst)
+ {
+ ref readonly InstructionInfo info = ref _instTable[(int)inst];
+
+ if (source != default)
+ {
+ if (source.Kind == OperandKind.Constant)
+ {
+ ulong imm = source.Value;
+
+ if (inst == X86Instruction.Mov8)
+ {
+ WriteOpCode(dest, default, default, type, info.Flags, info.OpRMImm8);
+
+ WriteByte((byte)imm);
+ }
+ else if (inst == X86Instruction.Mov16)
+ {
+ WriteOpCode(dest, default, default, type, info.Flags, info.OpRMImm32);
+
+ WriteInt16((short)imm);
+ }
+ else if (IsImm8(imm, type) && info.OpRMImm8 != BadOp)
+ {
+ WriteOpCode(dest, default, default, type, info.Flags, info.OpRMImm8);
+
+ WriteByte((byte)imm);
+ }
+ else if (!source.Relocatable && IsImm32(imm, type) && info.OpRMImm32 != BadOp)
+ {
+ WriteOpCode(dest, default, default, type, info.Flags, info.OpRMImm32);
+
+ WriteInt32((int)imm);
+ }
+ else if (dest != default && dest.Kind == OperandKind.Register && info.OpRImm64 != BadOp)
+ {
+ int rexPrefix = GetRexPrefix(dest, source, type, rrm: false);
+
+ if (rexPrefix != 0)
+ {
+ WriteByte((byte)rexPrefix);
+ }
+
+ WriteByte((byte)(info.OpRImm64 + (dest.GetRegister().Index & 0b111)));
+
+ if (HasRelocs && source.Relocatable)
+ {
+ _relocs.Add(new Reloc
+ {
+ JumpIndex = _jumps.Count - 1,
+ Position = (int)_stream.Position,
+ Symbol = source.Symbol
+ });
+ }
+
+ WriteUInt64(imm);
+ }
+ else
+ {
+ throw new ArgumentException($"Failed to encode constant 0x{imm:X}.");
+ }
+ }
+ else if (source.Kind == OperandKind.Register && info.OpRMR != BadOp)
+ {
+ WriteOpCode(dest, default, source, type, info.Flags, info.OpRMR);
+ }
+ else if (info.OpRRM != BadOp)
+ {
+ WriteOpCode(dest, default, source, type, info.Flags, info.OpRRM, rrm: true);
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid source operand kind \"{source.Kind}\".");
+ }
+ }
+ else if (info.OpRRM != BadOp)
+ {
+ WriteOpCode(dest, default, source, type, info.Flags, info.OpRRM, rrm: true);
+ }
+ else if (info.OpRMR != BadOp)
+ {
+ WriteOpCode(dest, default, source, type, info.Flags, info.OpRMR);
+ }
+ else
+ {
+ throw new ArgumentNullException(nameof(source));
+ }
+ }
+
+ private void WriteInstruction(
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ X86Instruction inst,
+ OperandType type = OperandType.None)
+ {
+ ref readonly InstructionInfo info = ref _instTable[(int)inst];
+
+ if (src2 != default)
+ {
+ if (src2.Kind == OperandKind.Constant)
+ {
+ ulong imm = src2.Value;
+
+ if ((byte)imm == imm && info.OpRMImm8 != BadOp)
+ {
+ WriteOpCode(dest, src1, default, type, info.Flags, info.OpRMImm8);
+
+ WriteByte((byte)imm);
+ }
+ else
+ {
+ throw new ArgumentException($"Failed to encode constant 0x{imm:X}.");
+ }
+ }
+ else if (src2.Kind == OperandKind.Register && info.OpRMR != BadOp)
+ {
+ WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRMR);
+ }
+ else if (info.OpRRM != BadOp)
+ {
+ WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRRM, rrm: true);
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid source operand kind \"{src2.Kind}\".");
+ }
+ }
+ else if (info.OpRRM != BadOp)
+ {
+ WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRRM, rrm: true);
+ }
+ else if (info.OpRMR != BadOp)
+ {
+ WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRMR);
+ }
+ else
+ {
+ throw new ArgumentNullException(nameof(src2));
+ }
+ }
+
+ private void WriteOpCode(
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ OperandType type,
+ InstructionFlags flags,
+ int opCode,
+ bool rrm = false)
+ {
+ int rexPrefix = GetRexPrefix(dest, src2, type, rrm);
+
+ if ((flags & InstructionFlags.RexW) != 0)
+ {
+ rexPrefix |= RexWPrefix;
+ }
+
+ int modRM = (opCode >> OpModRMBits) << 3;
+
+ MemoryOperand memOp = default;
+ bool hasMemOp = false;
+
+ if (dest != default)
+ {
+ if (dest.Kind == OperandKind.Register)
+ {
+ int regIndex = dest.GetRegister().Index;
+
+ modRM |= (regIndex & 0b111) << (rrm ? 3 : 0);
+
+ if ((flags & InstructionFlags.Reg8Dest) != 0 && regIndex >= 4)
+ {
+ rexPrefix |= RexPrefix;
+ }
+ }
+ else if (dest.Kind == OperandKind.Memory)
+ {
+ memOp = dest.GetMemory();
+ hasMemOp = true;
+ }
+ else
+ {
+ throw new ArgumentException("Invalid destination operand kind \"" + dest.Kind + "\".");
+ }
+ }
+
+ if (src2 != default)
+ {
+ if (src2.Kind == OperandKind.Register)
+ {
+ int regIndex = src2.GetRegister().Index;
+
+ modRM |= (regIndex & 0b111) << (rrm ? 0 : 3);
+
+ if ((flags & InstructionFlags.Reg8Src) != 0 && regIndex >= 4)
+ {
+ rexPrefix |= RexPrefix;
+ }
+ }
+ else if (src2.Kind == OperandKind.Memory && !hasMemOp)
+ {
+ memOp = src2.GetMemory();
+ hasMemOp = true;
+ }
+ else
+ {
+ throw new ArgumentException("Invalid source operand kind \"" + src2.Kind + "\".");
+ }
+ }
+
+ bool needsSibByte = false;
+ bool needsDisplacement = false;
+
+ int sib = 0;
+
+ if (hasMemOp)
+ {
+ // Either source or destination is a memory operand.
+ Register baseReg = memOp.BaseAddress.GetRegister();
+
+ X86Register baseRegLow = (X86Register)(baseReg.Index & 0b111);
+
+ needsSibByte = memOp.Index != default || baseRegLow == X86Register.Rsp;
+ needsDisplacement = memOp.Displacement != 0 || baseRegLow == X86Register.Rbp;
+
+ if (needsDisplacement)
+ {
+ if (ConstFitsOnS8(memOp.Displacement))
+ {
+ modRM |= 0x40;
+ }
+ else /* if (ConstFitsOnS32(memOp.Displacement)) */
+ {
+ modRM |= 0x80;
+ }
+ }
+
+ if (baseReg.Index >= 8)
+ {
+ Debug.Assert((uint)baseReg.Index <= MaxRegNumber);
+
+ rexPrefix |= RexPrefix | (baseReg.Index >> 3);
+ }
+
+ if (needsSibByte)
+ {
+ sib = (int)baseRegLow;
+
+ if (memOp.Index != default)
+ {
+ int indexReg = memOp.Index.GetRegister().Index;
+
+ Debug.Assert(indexReg != (int)X86Register.Rsp, "Using RSP as index register on the memory operand is not allowed.");
+
+ if (indexReg >= 8)
+ {
+ Debug.Assert((uint)indexReg <= MaxRegNumber);
+
+ rexPrefix |= RexPrefix | (indexReg >> 3) << 1;
+ }
+
+ sib |= (indexReg & 0b111) << 3;
+ }
+ else
+ {
+ sib |= 0b100 << 3;
+ }
+
+ sib |= (int)memOp.Scale << 6;
+
+ modRM |= 0b100;
+ }
+ else
+ {
+ modRM |= (int)baseRegLow;
+ }
+ }
+ else
+ {
+ // Source and destination are registers.
+ modRM |= 0xc0;
+ }
+
+ Debug.Assert(opCode != BadOp, "Invalid opcode value.");
+
+ if ((flags & InstructionFlags.Evex) != 0 && HardwareCapabilities.SupportsEvexEncoding)
+ {
+ WriteEvexInst(dest, src1, src2, type, flags, opCode);
+
+ opCode &= 0xff;
+ }
+ else if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding)
+ {
+ // In a vex encoding, only one prefix can be active at a time. The active prefix is encoded in the second byte using two bits.
+
+ int vexByte2 = (flags & InstructionFlags.PrefixMask) switch
+ {
+ InstructionFlags.Prefix66 => 1,
+ InstructionFlags.PrefixF3 => 2,
+ InstructionFlags.PrefixF2 => 3,
+ _ => 0
+ };
+
+ if (src1 != default)
+ {
+ vexByte2 |= (src1.GetRegister().Index ^ 0xf) << 3;
+ }
+ else
+ {
+ vexByte2 |= 0b1111 << 3;
+ }
+
+ ushort opCodeHigh = (ushort)(opCode >> 8);
+
+ if ((rexPrefix & 0b1011) == 0 && opCodeHigh == 0xf)
+ {
+ // Two-byte form.
+ WriteByte(0xc5);
+
+ vexByte2 |= (~rexPrefix & 4) << 5;
+
+ WriteByte((byte)vexByte2);
+ }
+ else
+ {
+ // Three-byte form.
+ WriteByte(0xc4);
+
+ int vexByte1 = (~rexPrefix & 7) << 5;
+
+ switch (opCodeHigh)
+ {
+ case 0xf: vexByte1 |= 1; break;
+ case 0xf38: vexByte1 |= 2; break;
+ case 0xf3a: vexByte1 |= 3; break;
+
+ default: Debug.Assert(false, $"Failed to VEX encode opcode 0x{opCode:X}."); break;
+ }
+
+ vexByte2 |= (rexPrefix & 8) << 4;
+
+ WriteByte((byte)vexByte1);
+ WriteByte((byte)vexByte2);
+ }
+
+ opCode &= 0xff;
+ }
+ else
+ {
+ if (flags.HasFlag(InstructionFlags.Prefix66))
+ {
+ WriteByte(0x66);
+ }
+
+ if (flags.HasFlag(InstructionFlags.PrefixF2))
+ {
+ WriteByte(0xf2);
+ }
+
+ if (flags.HasFlag(InstructionFlags.PrefixF3))
+ {
+ WriteByte(0xf3);
+ }
+
+ if (rexPrefix != 0)
+ {
+ WriteByte((byte)rexPrefix);
+ }
+ }
+
+ if (dest != default && (flags & InstructionFlags.RegOnly) != 0)
+ {
+ opCode += dest.GetRegister().Index & 7;
+ }
+
+ if ((opCode & 0xff0000) != 0)
+ {
+ WriteByte((byte)(opCode >> 16));
+ }
+
+ if ((opCode & 0xff00) != 0)
+ {
+ WriteByte((byte)(opCode >> 8));
+ }
+
+ WriteByte((byte)opCode);
+
+ if ((flags & InstructionFlags.RegOnly) == 0)
+ {
+ WriteByte((byte)modRM);
+
+ if (needsSibByte)
+ {
+ WriteByte((byte)sib);
+ }
+
+ if (needsDisplacement)
+ {
+ if (ConstFitsOnS8(memOp.Displacement))
+ {
+ WriteByte((byte)memOp.Displacement);
+ }
+ else /* if (ConstFitsOnS32(memOp.Displacement)) */
+ {
+ WriteInt32(memOp.Displacement);
+ }
+ }
+ }
+ }
+
+ private void WriteEvexInst(
+ Operand dest,
+ Operand src1,
+ Operand src2,
+ OperandType type,
+ InstructionFlags flags,
+ int opCode,
+ bool broadcast = false,
+ int registerWidth = 128,
+ int maskRegisterIdx = 0,
+ bool zeroElements = false)
+ {
+ int op1Idx = dest.GetRegister().Index;
+ int op2Idx = src1.GetRegister().Index;
+ int op3Idx = src2.GetRegister().Index;
+
+ WriteByte(0x62);
+
+ // P0
+ // Extend operand 1 register
+ bool r = (op1Idx & 8) == 0;
+ // Extend operand 3 register
+ bool x = (op3Idx & 16) == 0;
+ // Extend operand 3 register
+ bool b = (op3Idx & 8) == 0;
+ // Extend operand 1 register
+ bool rp = (op1Idx & 16) == 0;
+ // Escape code index
+ byte mm = 0b00;
+
+ switch ((ushort)(opCode >> 8))
+ {
+ case 0xf00: mm = 0b01; break;
+ case 0xf38: mm = 0b10; break;
+ case 0xf3a: mm = 0b11; break;
+
+ default: Debug.Fail($"Failed to EVEX encode opcode 0x{opCode:X}."); break;
+ }
+
+ WriteByte(
+ (byte)(
+ (r ? 0x80 : 0) |
+ (x ? 0x40 : 0) |
+ (b ? 0x20 : 0) |
+ (rp ? 0x10 : 0) |
+ mm));
+
+ // P1
+ // Specify 64-bit lane mode
+ bool w = Is64Bits(type);
+ // Operand 2 register index
+ byte vvvv = (byte)(~op2Idx & 0b1111);
+ // Opcode prefix
+ byte pp = (flags & InstructionFlags.PrefixMask) switch
+ {
+ InstructionFlags.Prefix66 => 0b01,
+ InstructionFlags.PrefixF3 => 0b10,
+ InstructionFlags.PrefixF2 => 0b11,
+ _ => 0
+ };
+ WriteByte(
+ (byte)(
+ (w ? 0x80 : 0) |
+ (vvvv << 3) |
+ 0b100 |
+ pp));
+
+ // P2
+ // Mask register determines what elements to zero, rather than what elements to merge
+ bool z = zeroElements;
+ // Specifies register-width
+ byte ll = 0b00;
+ switch (registerWidth)
+ {
+ case 128: ll = 0b00; break;
+ case 256: ll = 0b01; break;
+ case 512: ll = 0b10; break;
+
+ default: Debug.Fail($"Invalid EVEX vector register width {registerWidth}."); break;
+ }
+ // Embedded broadcast in the case of a memory operand
+ bool bcast = broadcast;
+ // Extend operand 2 register
+ bool vp = (op2Idx & 16) == 0;
+ // Mask register index
+ Debug.Assert(maskRegisterIdx < 8, $"Invalid mask register index {maskRegisterIdx}.");
+ byte aaa = (byte)(maskRegisterIdx & 0b111);
+
+ WriteByte(
+ (byte)(
+ (z ? 0x80 : 0) |
+ (ll << 5) |
+ (bcast ? 0x10 : 0) |
+ (vp ? 8 : 0) |
+ aaa));
+ }
+
+ private void WriteCompactInst(Operand operand, int opCode)
+ {
+ int regIndex = operand.GetRegister().Index;
+
+ if (regIndex >= 8)
+ {
+ WriteByte(0x41);
+ }
+
+ WriteByte((byte)(opCode + (regIndex & 0b111)));
+ }
+
+ private static int GetRexPrefix(Operand dest, Operand source, OperandType type, bool rrm)
+ {
+ int rexPrefix = 0;
+
+ if (Is64Bits(type))
+ {
+ rexPrefix = RexWPrefix;
+ }
+
+ void SetRegisterHighBit(Register reg, int bit)
+ {
+ if (reg.Index >= 8)
+ {
+ rexPrefix |= RexPrefix | (reg.Index >> 3) << bit;
+ }
+ }
+
+ if (dest != default && dest.Kind == OperandKind.Register)
+ {
+ SetRegisterHighBit(dest.GetRegister(), rrm ? 2 : 0);
+ }
+
+ if (source != default && source.Kind == OperandKind.Register)
+ {
+ SetRegisterHighBit(source.GetRegister(), rrm ? 0 : 2);
+ }
+
+ return rexPrefix;
+ }
+
+ public (byte[], RelocInfo) GetCode()
+ {
+ var jumps = CollectionsMarshal.AsSpan(_jumps);
+ var relocs = CollectionsMarshal.AsSpan(_relocs);
+
+ // Write jump relative offsets.
+ bool modified;
+
+ do
+ {
+ modified = false;
+
+ for (int i = 0; i < jumps.Length; i++)
+ {
+ ref Jump jump = ref jumps[i];
+
+ // If jump target not resolved yet, resolve it.
+ if (jump.JumpTarget == null)
+ {
+ jump.JumpTarget = _labels[jump.JumpLabel];
+ }
+
+ long jumpTarget = jump.JumpTarget.Value;
+ long offset = jumpTarget - jump.JumpPosition;
+
+ if (offset < 0)
+ {
+ for (int j = i - 1; j >= 0; j--)
+ {
+ ref Jump jump2 = ref jumps[j];
+
+ if (jump2.JumpPosition < jumpTarget)
+ {
+ break;
+ }
+
+ offset -= jump2.InstSize - ReservedBytesForJump;
+ }
+ }
+ else
+ {
+ for (int j = i + 1; j < jumps.Length; j++)
+ {
+ ref Jump jump2 = ref jumps[j];
+
+ if (jump2.JumpPosition >= jumpTarget)
+ {
+ break;
+ }
+
+ offset += jump2.InstSize - ReservedBytesForJump;
+ }
+
+ offset -= ReservedBytesForJump;
+ }
+
+ if (jump.IsConditional)
+ {
+ jump.InstSize = GetJccLength(offset);
+ }
+ else
+ {
+ jump.InstSize = GetJmpLength(offset);
+ }
+
+ // The jump is relative to the next instruction, not the current one.
+ // Since we didn't know the next instruction address when calculating
+ // the offset (as the size of the current jump instruction was not known),
+ // we now need to compensate the offset with the jump instruction size.
+ // It's also worth noting that:
+ // - This is only needed for backward jumps.
+ // - The GetJmpLength and GetJccLength also compensates the offset
+ // internally when computing the jump instruction size.
+ if (offset < 0)
+ {
+ offset -= jump.InstSize;
+ }
+
+ if (jump.Offset != offset)
+ {
+ jump.Offset = offset;
+
+ modified = true;
+ }
+ }
+ }
+ while (modified);
+
+ // Write the code, ignoring the dummy bytes after jumps, into a new stream.
+ _stream.Seek(0, SeekOrigin.Begin);
+
+ using var codeStream = MemoryStreamManager.Shared.GetStream();
+ var assembler = new Assembler(codeStream, HasRelocs);
+
+ bool hasRelocs = HasRelocs;
+ int relocIndex = 0;
+ int relocOffset = 0;
+ var relocEntries = hasRelocs
+ ? new RelocEntry[relocs.Length]
+ : Array.Empty<RelocEntry>();
+
+ for (int i = 0; i < jumps.Length; i++)
+ {
+ ref Jump jump = ref jumps[i];
+
+ // If has relocations, calculate their new positions compensating for jumps.
+ if (hasRelocs)
+ {
+ relocOffset += jump.InstSize - ReservedBytesForJump;
+
+ for (; relocIndex < relocEntries.Length; relocIndex++)
+ {
+ ref Reloc reloc = ref relocs[relocIndex];
+
+ if (reloc.JumpIndex > i)
+ {
+ break;
+ }
+
+ relocEntries[relocIndex] = new RelocEntry(reloc.Position + relocOffset, reloc.Symbol);
+ }
+ }
+
+ Span<byte> buffer = new byte[jump.JumpPosition - _stream.Position];
+
+ _stream.Read(buffer);
+ _stream.Seek(ReservedBytesForJump, SeekOrigin.Current);
+
+ codeStream.Write(buffer);
+
+ if (jump.IsConditional)
+ {
+ assembler.Jcc(jump.Condition, jump.Offset);
+ }
+ else
+ {
+ assembler.Jmp(jump.Offset);
+ }
+ }
+
+ // Write remaining relocations. This case happens when there are no jumps assembled.
+ for (; relocIndex < relocEntries.Length; relocIndex++)
+ {
+ ref Reloc reloc = ref relocs[relocIndex];
+
+ relocEntries[relocIndex] = new RelocEntry(reloc.Position + relocOffset, reloc.Symbol);
+ }
+
+ _stream.CopyTo(codeStream);
+
+ var code = codeStream.ToArray();
+ var relocInfo = new RelocInfo(relocEntries);
+
+ return (code, relocInfo);
+ }
+
+ private static bool Is64Bits(OperandType type)
+ {
+ return type == OperandType.I64 || type == OperandType.FP64;
+ }
+
+ private static bool IsImm8(ulong immediate, OperandType type)
+ {
+ long value = type == OperandType.I32 ? (int)immediate : (long)immediate;
+
+ return ConstFitsOnS8(value);
+ }
+
+ private static bool IsImm32(ulong immediate, OperandType type)
+ {
+ long value = type == OperandType.I32 ? (int)immediate : (long)immediate;
+
+ return ConstFitsOnS32(value);
+ }
+
+ private static int GetJccLength(long offset)
+ {
+ if (ConstFitsOnS8(offset < 0 ? offset - 2 : offset))
+ {
+ return 2;
+ }
+ else if (ConstFitsOnS32(offset < 0 ? offset - 6 : offset))
+ {
+ return 6;
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(offset));
+ }
+ }
+
+ private static int GetJmpLength(long offset)
+ {
+ if (ConstFitsOnS8(offset < 0 ? offset - 2 : offset))
+ {
+ return 2;
+ }
+ else if (ConstFitsOnS32(offset < 0 ? offset - 5 : offset))
+ {
+ return 5;
+ }
+ else
+ {
+ throw new ArgumentOutOfRangeException(nameof(offset));
+ }
+ }
+
+ private static bool ConstFitsOnS8(long value)
+ {
+ return value == (sbyte)value;
+ }
+
+ private static bool ConstFitsOnS32(long value)
+ {
+ return value == (int)value;
+ }
+
+ private void WriteInt16(short value)
+ {
+ WriteUInt16((ushort)value);
+ }
+
+ private void WriteInt32(int value)
+ {
+ WriteUInt32((uint)value);
+ }
+
+ private void WriteByte(byte value)
+ {
+ _stream.WriteByte(value);
+ }
+
+ private void WriteUInt16(ushort value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ }
+
+ private void WriteUInt32(uint value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ _stream.WriteByte((byte)(value >> 16));
+ _stream.WriteByte((byte)(value >> 24));
+ }
+
+ private void WriteUInt64(ulong value)
+ {
+ _stream.WriteByte((byte)(value >> 0));
+ _stream.WriteByte((byte)(value >> 8));
+ _stream.WriteByte((byte)(value >> 16));
+ _stream.WriteByte((byte)(value >> 24));
+ _stream.WriteByte((byte)(value >> 32));
+ _stream.WriteByte((byte)(value >> 40));
+ _stream.WriteByte((byte)(value >> 48));
+ _stream.WriteByte((byte)(value >> 56));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/AssemblerTable.cs b/src/ARMeilleure/CodeGen/X86/AssemblerTable.cs
new file mode 100644
index 00000000..e6a2ff07
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/AssemblerTable.cs
@@ -0,0 +1,295 @@
+using System;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ partial class Assembler
+ {
+ public static bool SupportsVexPrefix(X86Instruction inst)
+ {
+ return _instTable[(int)inst].Flags.HasFlag(InstructionFlags.Vex);
+ }
+
+ private const int BadOp = 0;
+
+ [Flags]
+ private enum InstructionFlags
+ {
+ None = 0,
+ RegOnly = 1 << 0,
+ Reg8Src = 1 << 1,
+ Reg8Dest = 1 << 2,
+ RexW = 1 << 3,
+ Vex = 1 << 4,
+ Evex = 1 << 5,
+
+ PrefixBit = 16,
+ PrefixMask = 7 << PrefixBit,
+ Prefix66 = 1 << PrefixBit,
+ PrefixF3 = 2 << PrefixBit,
+ PrefixF2 = 4 << PrefixBit
+ }
+
+ private readonly struct InstructionInfo
+ {
+ public int OpRMR { get; }
+ public int OpRMImm8 { get; }
+ public int OpRMImm32 { get; }
+ public int OpRImm64 { get; }
+ public int OpRRM { get; }
+
+ public InstructionFlags Flags { get; }
+
+ public InstructionInfo(
+ int opRMR,
+ int opRMImm8,
+ int opRMImm32,
+ int opRImm64,
+ int opRRM,
+ InstructionFlags flags)
+ {
+ OpRMR = opRMR;
+ OpRMImm8 = opRMImm8;
+ OpRMImm32 = opRMImm32;
+ OpRImm64 = opRImm64;
+ OpRRM = opRRM;
+ Flags = flags;
+ }
+ }
+
+ private readonly static InstructionInfo[] _instTable;
+
+ static Assembler()
+ {
+ _instTable = new InstructionInfo[(int)X86Instruction.Count];
+
+ // Name RM/R RM/I8 RM/I32 R/I64 R/RM Flags
+ Add(X86Instruction.Add, new InstructionInfo(0x00000001, 0x00000083, 0x00000081, BadOp, 0x00000003, InstructionFlags.None));
+ Add(X86Instruction.Addpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Addps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex));
+ Add(X86Instruction.Addsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Addss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Aesdec, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38de, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Aesdeclast, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38df, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Aesenc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38dc, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Aesenclast, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38dd, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Aesimc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38db, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.And, new InstructionInfo(0x00000021, 0x04000083, 0x04000081, BadOp, 0x00000023, InstructionFlags.None));
+ Add(X86Instruction.Andnpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Andnps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstructionFlags.Vex));
+ Add(X86Instruction.Andpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f54, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Andps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f54, InstructionFlags.Vex));
+ Add(X86Instruction.Blendvpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3815, InstructionFlags.Prefix66));
+ Add(X86Instruction.Blendvps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3814, InstructionFlags.Prefix66));
+ Add(X86Instruction.Bsr, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbd, InstructionFlags.None));
+ Add(X86Instruction.Bswap, new InstructionInfo(0x00000fc8, BadOp, BadOp, BadOp, BadOp, InstructionFlags.RegOnly));
+ Add(X86Instruction.Call, new InstructionInfo(0x020000ff, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Cmovcc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f40, InstructionFlags.None));
+ Add(X86Instruction.Cmp, new InstructionInfo(0x00000039, 0x07000083, 0x07000081, BadOp, 0x0000003b, InstructionFlags.None));
+ Add(X86Instruction.Cmppd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Cmpps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex));
+ Add(X86Instruction.Cmpsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cmpss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Cmpxchg, new InstructionInfo(0x00000fb1, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Cmpxchg16b, new InstructionInfo(0x01000fc7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.RexW));
+ Add(X86Instruction.Cmpxchg8, new InstructionInfo(0x00000fb0, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Reg8Src));
+ Add(X86Instruction.Comisd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Comiss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex));
+ Add(X86Instruction.Crc32, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38f1, InstructionFlags.PrefixF2));
+ Add(X86Instruction.Crc32_16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38f1, InstructionFlags.PrefixF2 | InstructionFlags.Prefix66));
+ Add(X86Instruction.Crc32_8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38f0, InstructionFlags.PrefixF2 | InstructionFlags.Reg8Src));
+ Add(X86Instruction.Cvtdq2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Cvtdq2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex));
+ Add(X86Instruction.Cvtpd2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cvtpd2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Cvtps2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Cvtps2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex));
+ Add(X86Instruction.Cvtsd2si, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2d, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cvtsd2ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cvtsi2sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Cvtsi2ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Cvtss2sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Cvtss2si, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2d, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Div, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x060000f7, InstructionFlags.None));
+ Add(X86Instruction.Divpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Divps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex));
+ Add(X86Instruction.Divsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Divss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Gf2p8affineqb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3ace, InstructionFlags.Prefix66));
+ Add(X86Instruction.Haddpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Haddps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Idiv, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x070000f7, InstructionFlags.None));
+ Add(X86Instruction.Imul, new InstructionInfo(BadOp, 0x0000006b, 0x00000069, BadOp, 0x00000faf, InstructionFlags.None));
+ Add(X86Instruction.Imul128, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x050000f7, InstructionFlags.None));
+ Add(X86Instruction.Insertps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a21, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Jmp, new InstructionInfo(0x040000ff, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Ldmxcsr, new InstructionInfo(0x02000fae, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex));
+ Add(X86Instruction.Lea, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x0000008d, InstructionFlags.None));
+ Add(X86Instruction.Maxpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Maxps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex));
+ Add(X86Instruction.Maxsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Maxss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Minpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Minps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex));
+ Add(X86Instruction.Minsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Minss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Mov, new InstructionInfo(0x00000089, BadOp, 0x000000c7, 0x000000b8, 0x0000008b, InstructionFlags.None));
+ Add(X86Instruction.Mov16, new InstructionInfo(0x00000089, BadOp, 0x000000c7, BadOp, 0x0000008b, InstructionFlags.Prefix66));
+ Add(X86Instruction.Mov8, new InstructionInfo(0x00000088, 0x000000c6, BadOp, BadOp, 0x0000008a, InstructionFlags.Reg8Src | InstructionFlags.Reg8Dest));
+ Add(X86Instruction.Movd, new InstructionInfo(0x00000f7e, BadOp, BadOp, BadOp, 0x00000f6e, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Movdqu, new InstructionInfo(0x00000f7f, BadOp, BadOp, BadOp, 0x00000f6f, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Movhlps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f12, InstructionFlags.Vex));
+ Add(X86Instruction.Movlhps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f16, InstructionFlags.Vex));
+ Add(X86Instruction.Movq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7e, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Movsd, new InstructionInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Movss, new InstructionInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Movsx16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbf, InstructionFlags.None));
+ Add(X86Instruction.Movsx32, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000063, InstructionFlags.None));
+ Add(X86Instruction.Movsx8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbe, InstructionFlags.Reg8Src));
+ Add(X86Instruction.Movzx16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb7, InstructionFlags.None));
+ Add(X86Instruction.Movzx8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb6, InstructionFlags.Reg8Src));
+ Add(X86Instruction.Mul128, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x040000f7, InstructionFlags.None));
+ Add(X86Instruction.Mulpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Mulps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex));
+ Add(X86Instruction.Mulsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Mulss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Neg, new InstructionInfo(0x030000f7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Not, new InstructionInfo(0x020000f7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Or, new InstructionInfo(0x00000009, 0x01000083, 0x01000081, BadOp, 0x0000000b, InstructionFlags.None));
+ Add(X86Instruction.Paddb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffc, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Paddd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffe, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Paddq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd4, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Paddw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffd, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Palignr, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pand, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pandn, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdf, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pavgb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe0, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pavgw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe3, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3810, InstructionFlags.Prefix66));
+ Add(X86Instruction.Pclmulqdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a44, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpeqb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f74, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpeqd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f76, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpeqq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3829, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpeqw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f75, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpgtb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f64, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpgtd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f66, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpgtq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3837, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pcmpgtw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f65, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pextrb, new InstructionInfo(0x000f3a14, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pextrd, new InstructionInfo(0x000f3a16, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pextrq, new InstructionInfo(0x000f3a16, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.RexW | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pextrw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc5, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pinsrb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a20, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pinsrd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a22, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pinsrq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a22, InstructionFlags.Vex | InstructionFlags.RexW | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pinsrw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc4, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxsb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383d, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxsw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fee, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxub, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fde, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxud, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383f, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmaxuw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383e, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminsb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3838, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3839, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminsw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fea, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminub, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fda, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminud, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pminuw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovsxbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3820, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovsxdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3825, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovsxwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3823, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovzxbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3830, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovzxdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3835, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmovzxwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3833, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmulld, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3840, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pmullw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd5, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pop, new InstructionInfo(0x0000008f, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Popcnt, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb8, InstructionFlags.PrefixF3));
+ Add(X86Instruction.Por, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000feb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pshufb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3800, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pshufd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f70, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pslld, new InstructionInfo(BadOp, 0x06000f72, BadOp, BadOp, 0x00000ff2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Pslldq, new InstructionInfo(BadOp, 0x07000f73, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psllq, new InstructionInfo(BadOp, 0x06000f73, BadOp, BadOp, 0x00000ff3, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psllw, new InstructionInfo(BadOp, 0x06000f71, BadOp, BadOp, 0x00000ff1, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrad, new InstructionInfo(BadOp, 0x04000f72, BadOp, BadOp, 0x00000fe2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psraw, new InstructionInfo(BadOp, 0x04000f71, BadOp, BadOp, 0x00000fe1, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrld, new InstructionInfo(BadOp, 0x02000f72, BadOp, BadOp, 0x00000fd2, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrlq, new InstructionInfo(BadOp, 0x02000f73, BadOp, BadOp, 0x00000fd3, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrldq, new InstructionInfo(BadOp, 0x03000f73, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psrlw, new InstructionInfo(BadOp, 0x02000f71, BadOp, BadOp, 0x00000fd1, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psubb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff8, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psubd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffa, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psubq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Psubw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff9, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckhbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f68, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckhdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckhqdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6d, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckhwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f69, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpcklbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f60, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpckldq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f62, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpcklqdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Punpcklwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f61, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Push, new InstructionInfo(BadOp, 0x0000006a, 0x00000068, BadOp, 0x060000ff, InstructionFlags.None));
+ Add(X86Instruction.Pxor, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fef, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Rcpps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstructionFlags.Vex));
+ Add(X86Instruction.Rcpss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Ror, new InstructionInfo(0x010000d3, 0x010000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Roundpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a09, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Roundps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a08, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Roundsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Roundss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Rsqrtps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstructionFlags.Vex));
+ Add(X86Instruction.Rsqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Sar, new InstructionInfo(0x070000d3, 0x070000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Setcc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f90, InstructionFlags.Reg8Dest));
+ Add(X86Instruction.Sha256Msg1, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cc, InstructionFlags.None));
+ Add(X86Instruction.Sha256Msg2, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cd, InstructionFlags.None));
+ Add(X86Instruction.Sha256Rnds2, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cb, InstructionFlags.None));
+ Add(X86Instruction.Shl, new InstructionInfo(0x040000d3, 0x040000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Shr, new InstructionInfo(0x050000d3, 0x050000c1, BadOp, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Shufpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Shufps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstructionFlags.Vex));
+ Add(X86Instruction.Sqrtpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Sqrtps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex));
+ Add(X86Instruction.Sqrtsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Sqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Stmxcsr, new InstructionInfo(0x03000fae, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex));
+ Add(X86Instruction.Sub, new InstructionInfo(0x00000029, 0x05000083, 0x05000081, BadOp, 0x0000002b, InstructionFlags.None));
+ Add(X86Instruction.Subpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Subps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex));
+ Add(X86Instruction.Subsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.PrefixF2));
+ Add(X86Instruction.Subss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.PrefixF3));
+ Add(X86Instruction.Test, new InstructionInfo(0x00000085, BadOp, 0x000000f7, BadOp, BadOp, InstructionFlags.None));
+ Add(X86Instruction.Unpckhpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Unpckhps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstructionFlags.Vex));
+ Add(X86Instruction.Unpcklpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Unpcklps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex));
+ Add(X86Instruction.Vblendvpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4b, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vblendvps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4a, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vcvtph2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3813, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vcvtps2ph, new InstructionInfo(0x000f3a1d, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vfmadd231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+ Add(X86Instruction.Vfmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vfmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+ Add(X86Instruction.Vfmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vfmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+ Add(X86Instruction.Vfmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vfnmadd231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+ Add(X86Instruction.Vfnmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vfnmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+ Add(X86Instruction.Vfnmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vfnmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW));
+ Add(X86Instruction.Vfnmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Vpternlogd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a25, InstructionFlags.Evex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None));
+ Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66));
+ Add(X86Instruction.Xorps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex));
+
+ static void Add(X86Instruction inst, in InstructionInfo info)
+ {
+ _instTable[(int)inst] = info;
+ }
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/CallConvName.cs b/src/ARMeilleure/CodeGen/X86/CallConvName.cs
new file mode 100644
index 00000000..be367628
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/CallConvName.cs
@@ -0,0 +1,8 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ enum CallConvName
+ {
+ SystemV,
+ Windows
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/CallingConvention.cs b/src/ARMeilleure/CodeGen/X86/CallingConvention.cs
new file mode 100644
index 00000000..953fef5b
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/CallingConvention.cs
@@ -0,0 +1,158 @@
+using System;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class CallingConvention
+ {
+ private const int RegistersMask = 0xffff;
+
+ public static int GetIntAvailableRegisters()
+ {
+ return RegistersMask & ~(1 << (int)X86Register.Rsp);
+ }
+
+ public static int GetVecAvailableRegisters()
+ {
+ return RegistersMask;
+ }
+
+ public static int GetIntCallerSavedRegisters()
+ {
+ if (GetCurrentCallConv() == CallConvName.Windows)
+ {
+ return (1 << (int)X86Register.Rax) |
+ (1 << (int)X86Register.Rcx) |
+ (1 << (int)X86Register.Rdx) |
+ (1 << (int)X86Register.R8) |
+ (1 << (int)X86Register.R9) |
+ (1 << (int)X86Register.R10) |
+ (1 << (int)X86Register.R11);
+ }
+ else /* if (GetCurrentCallConv() == CallConvName.SystemV) */
+ {
+ return (1 << (int)X86Register.Rax) |
+ (1 << (int)X86Register.Rcx) |
+ (1 << (int)X86Register.Rdx) |
+ (1 << (int)X86Register.Rsi) |
+ (1 << (int)X86Register.Rdi) |
+ (1 << (int)X86Register.R8) |
+ (1 << (int)X86Register.R9) |
+ (1 << (int)X86Register.R10) |
+ (1 << (int)X86Register.R11);
+ }
+ }
+
+ public static int GetVecCallerSavedRegisters()
+ {
+ if (GetCurrentCallConv() == CallConvName.Windows)
+ {
+ return (1 << (int)X86Register.Xmm0) |
+ (1 << (int)X86Register.Xmm1) |
+ (1 << (int)X86Register.Xmm2) |
+ (1 << (int)X86Register.Xmm3) |
+ (1 << (int)X86Register.Xmm4) |
+ (1 << (int)X86Register.Xmm5);
+ }
+ else /* if (GetCurrentCallConv() == CallConvName.SystemV) */
+ {
+ return RegistersMask;
+ }
+ }
+
+ public static int GetIntCalleeSavedRegisters()
+ {
+ return GetIntCallerSavedRegisters() ^ RegistersMask;
+ }
+
+ public static int GetVecCalleeSavedRegisters()
+ {
+ return GetVecCallerSavedRegisters() ^ RegistersMask;
+ }
+
+ public static int GetArgumentsOnRegsCount()
+ {
+ return 4;
+ }
+
+ public static int GetIntArgumentsOnRegsCount()
+ {
+ return 6;
+ }
+
+ public static int GetVecArgumentsOnRegsCount()
+ {
+ return 8;
+ }
+
+ public static X86Register GetIntArgumentRegister(int index)
+ {
+ if (GetCurrentCallConv() == CallConvName.Windows)
+ {
+ switch (index)
+ {
+ case 0: return X86Register.Rcx;
+ case 1: return X86Register.Rdx;
+ case 2: return X86Register.R8;
+ case 3: return X86Register.R9;
+ }
+ }
+ else /* if (GetCurrentCallConv() == CallConvName.SystemV) */
+ {
+ switch (index)
+ {
+ case 0: return X86Register.Rdi;
+ case 1: return X86Register.Rsi;
+ case 2: return X86Register.Rdx;
+ case 3: return X86Register.Rcx;
+ case 4: return X86Register.R8;
+ case 5: return X86Register.R9;
+ }
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ public static X86Register GetVecArgumentRegister(int index)
+ {
+ int count;
+
+ if (GetCurrentCallConv() == CallConvName.Windows)
+ {
+ count = 4;
+ }
+ else /* if (GetCurrentCallConv() == CallConvName.SystemV) */
+ {
+ count = 8;
+ }
+
+ if ((uint)index < count)
+ {
+ return X86Register.Xmm0 + index;
+ }
+
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ public static X86Register GetIntReturnRegister()
+ {
+ return X86Register.Rax;
+ }
+
+ public static X86Register GetIntReturnRegisterHigh()
+ {
+ return X86Register.Rdx;
+ }
+
+ public static X86Register GetVecReturnRegister()
+ {
+ return X86Register.Xmm0;
+ }
+
+ public static CallConvName GetCurrentCallConv()
+ {
+ return OperatingSystem.IsWindows()
+ ? CallConvName.Windows
+ : CallConvName.SystemV;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/CodeGenCommon.cs b/src/ARMeilleure/CodeGen/X86/CodeGenCommon.cs
new file mode 100644
index 00000000..237ecee4
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/CodeGenCommon.cs
@@ -0,0 +1,19 @@
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class CodeGenCommon
+ {
+ public static bool IsLongConst(Operand op)
+ {
+ long value = op.Type == OperandType.I32 ? op.AsInt32() : op.AsInt64();
+
+ return !ConstFitsOnS32(value);
+ }
+
+ private static bool ConstFitsOnS32(long value)
+ {
+ return value == (int)value;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/CodeGenContext.cs b/src/ARMeilleure/CodeGen/X86/CodeGenContext.cs
new file mode 100644
index 00000000..89948724
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/CodeGenContext.cs
@@ -0,0 +1,105 @@
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.IntermediateRepresentation;
+using Ryujinx.Common.Memory;
+using System.IO;
+using System.Numerics;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ class CodeGenContext
+ {
+ private readonly Stream _stream;
+ private readonly Operand[] _blockLabels;
+
+ public int StreamOffset => (int)_stream.Length;
+
+ public AllocationResult AllocResult { get; }
+
+ public Assembler Assembler { get; }
+ public BasicBlock CurrBlock { get; private set; }
+
+ public int CallArgsRegionSize { get; }
+ public int XmmSaveRegionSize { get; }
+
+ public CodeGenContext(AllocationResult allocResult, int maxCallArgs, int blocksCount, bool relocatable)
+ {
+ _stream = MemoryStreamManager.Shared.GetStream();
+ _blockLabels = new Operand[blocksCount];
+
+ AllocResult = allocResult;
+ Assembler = new Assembler(_stream, relocatable);
+
+ CallArgsRegionSize = GetCallArgsRegionSize(allocResult, maxCallArgs, out int xmmSaveRegionSize);
+ XmmSaveRegionSize = xmmSaveRegionSize;
+ }
+
+ private static int GetCallArgsRegionSize(AllocationResult allocResult, int maxCallArgs, out int xmmSaveRegionSize)
+ {
+ // We need to add 8 bytes to the total size, as the call to this function already pushed 8 bytes (the
+ // return address).
+ int intMask = CallingConvention.GetIntCalleeSavedRegisters() & allocResult.IntUsedRegisters;
+ int vecMask = CallingConvention.GetVecCalleeSavedRegisters() & allocResult.VecUsedRegisters;
+
+ xmmSaveRegionSize = BitOperations.PopCount((uint)vecMask) * 16;
+
+ int calleeSaveRegionSize = BitOperations.PopCount((uint)intMask) * 8 + xmmSaveRegionSize + 8;
+
+ int argsCount = maxCallArgs;
+
+ if (argsCount < 0)
+ {
+ // When the function has no calls, argsCount is -1. In this case, we don't need to allocate the shadow
+ // space.
+ argsCount = 0;
+ }
+ else if (argsCount < 4)
+ {
+ // The ABI mandates that the space for at least 4 arguments is reserved on the stack (this is called
+ // shadow space).
+ argsCount = 4;
+ }
+
+ // TODO: Align XMM save region to 16 bytes because unwinding on Windows requires it.
+ int frameSize = calleeSaveRegionSize + allocResult.SpillRegionSize;
+
+ // TODO: Instead of always multiplying by 16 (the largest possible size of a variable, since a V128 has 16
+ // bytes), we should calculate the exact size consumed by the arguments passed to the called functions on
+ // the stack.
+ int callArgsAndFrameSize = frameSize + argsCount * 16;
+
+ // Ensure that the Stack Pointer will be aligned to 16 bytes.
+ callArgsAndFrameSize = (callArgsAndFrameSize + 0xf) & ~0xf;
+
+ return callArgsAndFrameSize - frameSize;
+ }
+
+ public void EnterBlock(BasicBlock block)
+ {
+ Assembler.MarkLabel(GetLabel(block));
+
+ CurrBlock = block;
+ }
+
+ public void JumpTo(BasicBlock target)
+ {
+ Assembler.Jmp(GetLabel(target));
+ }
+
+ public void JumpTo(X86Condition condition, BasicBlock target)
+ {
+ Assembler.Jcc(condition, GetLabel(target));
+ }
+
+ private Operand GetLabel(BasicBlock block)
+ {
+ ref Operand label = ref _blockLabels[block.Index];
+
+ if (label == default)
+ {
+ label = Operand.Factory.Label();
+ }
+
+ return label;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/src/ARMeilleure/CodeGen/X86/CodeGenerator.cs
new file mode 100644
index 00000000..e7179b51
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/CodeGenerator.cs
@@ -0,0 +1,1865 @@
+using ARMeilleure.CodeGen.Linking;
+using ARMeilleure.CodeGen.Optimizations;
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.CodeGen.Unwinding;
+using ARMeilleure.Common;
+using ARMeilleure.Diagnostics;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Numerics;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class CodeGenerator
+ {
+ private const int RegistersCount = 16;
+ private const int PageSize = 0x1000;
+ private const int StackGuardSize = 0x2000;
+
+ private static readonly Action<CodeGenContext, Operation>[] _instTable;
+
+ static CodeGenerator()
+ {
+ _instTable = new Action<CodeGenContext, Operation>[EnumUtils.GetCount(typeof(Instruction))];
+
+ Add(Instruction.Add, GenerateAdd);
+ Add(Instruction.BitwiseAnd, GenerateBitwiseAnd);
+ Add(Instruction.BitwiseExclusiveOr, GenerateBitwiseExclusiveOr);
+ Add(Instruction.BitwiseNot, GenerateBitwiseNot);
+ Add(Instruction.BitwiseOr, GenerateBitwiseOr);
+ Add(Instruction.BranchIf, GenerateBranchIf);
+ Add(Instruction.ByteSwap, GenerateByteSwap);
+ Add(Instruction.Call, GenerateCall);
+ Add(Instruction.Clobber, GenerateClobber);
+ Add(Instruction.Compare, GenerateCompare);
+ Add(Instruction.CompareAndSwap, GenerateCompareAndSwap);
+ Add(Instruction.CompareAndSwap16, GenerateCompareAndSwap16);
+ Add(Instruction.CompareAndSwap8, GenerateCompareAndSwap8);
+ Add(Instruction.ConditionalSelect, GenerateConditionalSelect);
+ Add(Instruction.ConvertI64ToI32, GenerateConvertI64ToI32);
+ Add(Instruction.ConvertToFP, GenerateConvertToFP);
+ Add(Instruction.Copy, GenerateCopy);
+ Add(Instruction.CountLeadingZeros, GenerateCountLeadingZeros);
+ Add(Instruction.Divide, GenerateDivide);
+ Add(Instruction.DivideUI, GenerateDivideUI);
+ Add(Instruction.Fill, GenerateFill);
+ Add(Instruction.Load, GenerateLoad);
+ Add(Instruction.Load16, GenerateLoad16);
+ Add(Instruction.Load8, GenerateLoad8);
+ Add(Instruction.MemoryBarrier, GenerateMemoryBarrier);
+ Add(Instruction.Multiply, GenerateMultiply);
+ Add(Instruction.Multiply64HighSI, GenerateMultiply64HighSI);
+ Add(Instruction.Multiply64HighUI, GenerateMultiply64HighUI);
+ Add(Instruction.Negate, GenerateNegate);
+ Add(Instruction.Return, GenerateReturn);
+ Add(Instruction.RotateRight, GenerateRotateRight);
+ Add(Instruction.ShiftLeft, GenerateShiftLeft);
+ Add(Instruction.ShiftRightSI, GenerateShiftRightSI);
+ Add(Instruction.ShiftRightUI, GenerateShiftRightUI);
+ Add(Instruction.SignExtend16, GenerateSignExtend16);
+ Add(Instruction.SignExtend32, GenerateSignExtend32);
+ Add(Instruction.SignExtend8, GenerateSignExtend8);
+ Add(Instruction.Spill, GenerateSpill);
+ Add(Instruction.SpillArg, GenerateSpillArg);
+ Add(Instruction.StackAlloc, GenerateStackAlloc);
+ Add(Instruction.Store, GenerateStore);
+ Add(Instruction.Store16, GenerateStore16);
+ Add(Instruction.Store8, GenerateStore8);
+ Add(Instruction.Subtract, GenerateSubtract);
+ Add(Instruction.Tailcall, GenerateTailcall);
+ Add(Instruction.VectorCreateScalar, GenerateVectorCreateScalar);
+ Add(Instruction.VectorExtract, GenerateVectorExtract);
+ Add(Instruction.VectorExtract16, GenerateVectorExtract16);
+ Add(Instruction.VectorExtract8, GenerateVectorExtract8);
+ Add(Instruction.VectorInsert, GenerateVectorInsert);
+ Add(Instruction.VectorInsert16, GenerateVectorInsert16);
+ Add(Instruction.VectorInsert8, GenerateVectorInsert8);
+ Add(Instruction.VectorOne, GenerateVectorOne);
+ Add(Instruction.VectorZero, GenerateVectorZero);
+ Add(Instruction.VectorZeroUpper64, GenerateVectorZeroUpper64);
+ Add(Instruction.VectorZeroUpper96, GenerateVectorZeroUpper96);
+ Add(Instruction.ZeroExtend16, GenerateZeroExtend16);
+ Add(Instruction.ZeroExtend32, GenerateZeroExtend32);
+ Add(Instruction.ZeroExtend8, GenerateZeroExtend8);
+
+ static void Add(Instruction inst, Action<CodeGenContext, Operation> func)
+ {
+ _instTable[(int)inst] = func;
+ }
+ }
+
+ public static CompiledFunction Generate(CompilerContext cctx)
+ {
+ ControlFlowGraph cfg = cctx.Cfg;
+
+ Logger.StartPass(PassName.Optimization);
+
+ if (cctx.Options.HasFlag(CompilerOptions.Optimize))
+ {
+ if (cctx.Options.HasFlag(CompilerOptions.SsaForm))
+ {
+ Optimizer.RunPass(cfg);
+ }
+
+ BlockPlacement.RunPass(cfg);
+ }
+
+ X86Optimizer.RunPass(cfg);
+
+ Logger.EndPass(PassName.Optimization, cfg);
+
+ Logger.StartPass(PassName.PreAllocation);
+
+ StackAllocator stackAlloc = new();
+
+ PreAllocator.RunPass(cctx, stackAlloc, out int maxCallArgs);
+
+ Logger.EndPass(PassName.PreAllocation, cfg);
+
+ Logger.StartPass(PassName.RegisterAllocation);
+
+ if (cctx.Options.HasFlag(CompilerOptions.SsaForm))
+ {
+ Ssa.Deconstruct(cfg);
+ }
+
+ IRegisterAllocator regAlloc;
+
+ if (cctx.Options.HasFlag(CompilerOptions.Lsra))
+ {
+ regAlloc = new LinearScanAllocator();
+ }
+ else
+ {
+ regAlloc = new HybridAllocator();
+ }
+
+ RegisterMasks regMasks = new(
+ CallingConvention.GetIntAvailableRegisters(),
+ CallingConvention.GetVecAvailableRegisters(),
+ CallingConvention.GetIntCallerSavedRegisters(),
+ CallingConvention.GetVecCallerSavedRegisters(),
+ CallingConvention.GetIntCalleeSavedRegisters(),
+ CallingConvention.GetVecCalleeSavedRegisters(),
+ RegistersCount);
+
+ AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks);
+
+ Logger.EndPass(PassName.RegisterAllocation, cfg);
+
+ Logger.StartPass(PassName.CodeGeneration);
+
+ bool relocatable = (cctx.Options & CompilerOptions.Relocatable) != 0;
+
+ CodeGenContext context = new(allocResult, maxCallArgs, cfg.Blocks.Count, relocatable);
+
+ UnwindInfo unwindInfo = WritePrologue(context);
+
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ context.EnterBlock(block);
+
+ for (Operation node = block.Operations.First; node != default; node = node.ListNext)
+ {
+ GenerateOperation(context, node);
+ }
+
+ if (block.SuccessorsCount == 0)
+ {
+ // The only blocks which can have 0 successors are exit blocks.
+ Operation last = block.Operations.Last;
+
+ Debug.Assert(last.Instruction == Instruction.Tailcall ||
+ last.Instruction == Instruction.Return);
+ }
+ else
+ {
+ BasicBlock succ = block.GetSuccessor(0);
+
+ if (succ != block.ListNext)
+ {
+ context.JumpTo(succ);
+ }
+ }
+ }
+
+ (byte[] code, RelocInfo relocInfo) = context.Assembler.GetCode();
+
+ Logger.EndPass(PassName.CodeGeneration);
+
+ return new CompiledFunction(code, unwindInfo, relocInfo);
+ }
+
+ private static void GenerateOperation(CodeGenContext context, Operation operation)
+ {
+ if (operation.Instruction == Instruction.Extended)
+ {
+ IntrinsicInfo info = IntrinsicTable.GetInfo(operation.Intrinsic);
+
+ switch (info.Type)
+ {
+ case IntrinsicType.Comis_:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ switch (operation.Intrinsic)
+ {
+ case Intrinsic.X86Comisdeq:
+ context.Assembler.Comisd(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.Equal);
+ break;
+
+ case Intrinsic.X86Comisdge:
+ context.Assembler.Comisd(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.AboveOrEqual);
+ break;
+
+ case Intrinsic.X86Comisdlt:
+ context.Assembler.Comisd(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.Below);
+ break;
+
+ case Intrinsic.X86Comisseq:
+ context.Assembler.Comiss(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.Equal);
+ break;
+
+ case Intrinsic.X86Comissge:
+ context.Assembler.Comiss(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.AboveOrEqual);
+ break;
+
+ case Intrinsic.X86Comisslt:
+ context.Assembler.Comiss(src1, src2);
+ context.Assembler.Setcc(dest, X86Condition.Below);
+ break;
+ }
+
+ context.Assembler.Movzx8(dest, dest, OperandType.I32);
+
+ break;
+ }
+
+ case IntrinsicType.Mxcsr:
+ {
+ Operand offset = operation.GetSource(0);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+ Debug.Assert(offset.Type == OperandType.I32);
+
+ int offs = offset.AsInt32() + context.CallArgsRegionSize;
+
+ Operand rsp = Register(X86Register.Rsp);
+ Operand memOp = MemoryOp(OperandType.I32, rsp, default, Multiplier.x1, offs);
+
+ Debug.Assert(HardwareCapabilities.SupportsSse || HardwareCapabilities.SupportsVexEncoding);
+
+ if (operation.Intrinsic == Intrinsic.X86Ldmxcsr)
+ {
+ Operand bits = operation.GetSource(1);
+ Debug.Assert(bits.Type == OperandType.I32);
+
+ context.Assembler.Mov(memOp, bits, OperandType.I32);
+ context.Assembler.Ldmxcsr(memOp);
+ }
+ else if (operation.Intrinsic == Intrinsic.X86Stmxcsr)
+ {
+ Operand dest = operation.Destination;
+ Debug.Assert(dest.Type == OperandType.I32);
+
+ context.Assembler.Stmxcsr(memOp);
+ context.Assembler.Mov(dest, memOp, OperandType.I32);
+ }
+
+ break;
+ }
+
+ case IntrinsicType.PopCount:
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Popcnt(dest, source, dest.Type);
+
+ break;
+ }
+
+ case IntrinsicType.Unary:
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ context.Assembler.WriteInstruction(info.Inst, dest, source);
+
+ break;
+ }
+
+ case IntrinsicType.UnaryToGpr:
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && !source.Type.IsInteger());
+
+ if (operation.Intrinsic == Intrinsic.X86Cvtsi2si)
+ {
+ if (dest.Type == OperandType.I32)
+ {
+ context.Assembler.Movd(dest, source); // int _mm_cvtsi128_si32(__m128i a)
+ }
+ else /* if (dest.Type == OperandType.I64) */
+ {
+ context.Assembler.Movq(dest, source); // __int64 _mm_cvtsi128_si64(__m128i a)
+ }
+ }
+ else
+ {
+ context.Assembler.WriteInstruction(info.Inst, dest, source, dest.Type);
+ }
+
+ break;
+ }
+
+ case IntrinsicType.Binary:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(dest, src1);
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(!dest.Type.IsInteger());
+ Debug.Assert(!src2.Type.IsInteger() || src2.Kind == OperandKind.Constant);
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src1, src2);
+
+ break;
+ }
+
+ case IntrinsicType.BinaryGpr:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(dest, src1);
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(!dest.Type.IsInteger() && src2.Type.IsInteger());
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src1, src2, src2.Type);
+
+ break;
+ }
+
+ case IntrinsicType.Crc32:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameReg(dest, src1);
+
+ Debug.Assert(dest.Type.IsInteger() && src1.Type.IsInteger() && src2.Type.IsInteger());
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src2, dest.Type);
+
+ break;
+ }
+
+ case IntrinsicType.BinaryImm:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(dest, src1);
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(!dest.Type.IsInteger() && src2.Kind == OperandKind.Constant);
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src1, src2.AsByte());
+
+ break;
+ }
+
+ case IntrinsicType.Ternary:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameType(dest, src1, src2, src3);
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ if (info.Inst == X86Instruction.Blendvpd && HardwareCapabilities.SupportsVexEncoding)
+ {
+ context.Assembler.WriteInstruction(X86Instruction.Vblendvpd, dest, src1, src2, src3);
+ }
+ else if (info.Inst == X86Instruction.Blendvps && HardwareCapabilities.SupportsVexEncoding)
+ {
+ context.Assembler.WriteInstruction(X86Instruction.Vblendvps, dest, src1, src2, src3);
+ }
+ else if (info.Inst == X86Instruction.Pblendvb && HardwareCapabilities.SupportsVexEncoding)
+ {
+ context.Assembler.WriteInstruction(X86Instruction.Vpblendvb, dest, src1, src2, src3);
+ }
+ else
+ {
+ EnsureSameReg(dest, src1);
+
+ Debug.Assert(src3.GetRegister().Index == 0);
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src1, src2);
+ }
+
+ break;
+ }
+
+ case IntrinsicType.TernaryImm:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameType(dest, src1, src2);
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(!dest.Type.IsInteger() && src3.Kind == OperandKind.Constant);
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src1, src2, src3.AsByte());
+
+ break;
+ }
+
+ case IntrinsicType.Fma:
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ Debug.Assert(HardwareCapabilities.SupportsVexEncoding);
+
+ Debug.Assert(dest.Kind == OperandKind.Register && src1.Kind == OperandKind.Register && src2.Kind == OperandKind.Register);
+ Debug.Assert(src3.Kind == OperandKind.Register || src3.Kind == OperandKind.Memory);
+
+ EnsureSameType(dest, src1, src2, src3);
+ Debug.Assert(dest.Type == OperandType.V128);
+
+ Debug.Assert(dest.Value == src1.Value);
+
+ context.Assembler.WriteInstruction(info.Inst, dest, src2, src3);
+
+ break;
+ }
+ }
+ }
+ else
+ {
+ Action<CodeGenContext, Operation> func = _instTable[(int)operation.Instruction];
+
+ if (func != null)
+ {
+ func(context, operation);
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid instruction \"{operation.Instruction}\".");
+ }
+ }
+ }
+
+ private static void GenerateAdd(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ if (dest.Type.IsInteger())
+ {
+ // If Destination and Source 1 Operands are the same, perform a standard add as there are no benefits to using LEA.
+ if (dest.Kind == src1.Kind && dest.Value == src1.Value)
+ {
+ ValidateBinOp(dest, src1, src2);
+
+ context.Assembler.Add(dest, src2, dest.Type);
+ }
+ else
+ {
+ EnsureSameType(dest, src1, src2);
+
+ int offset;
+ Operand index;
+
+ if (src2.Kind == OperandKind.Constant)
+ {
+ offset = src2.AsInt32();
+ index = default;
+ }
+ else
+ {
+ offset = 0;
+ index = src2;
+ }
+
+ Operand memOp = MemoryOp(dest.Type, src1, index, Multiplier.x1, offset);
+
+ context.Assembler.Lea(dest, memOp, dest.Type);
+ }
+ }
+ else
+ {
+ ValidateBinOp(dest, src1, src2);
+
+ if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Addss(dest, src1, src2);
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ context.Assembler.Addsd(dest, src1, src2);
+ }
+ }
+ }
+
+ private static void GenerateBitwiseAnd(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ // Note: GenerateCompareCommon makes the assumption that BitwiseAnd will emit only a single `and`
+ // instruction.
+ context.Assembler.And(dest, src2, dest.Type);
+ }
+
+ private static void GenerateBitwiseExclusiveOr(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Xor(dest, src2, dest.Type);
+ }
+ else
+ {
+ context.Assembler.Xorps(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateBitwiseNot(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ ValidateUnOp(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Not(dest);
+ }
+
+ private static void GenerateBitwiseOr(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Or(dest, src2, dest.Type);
+ }
+
+ private static void GenerateBranchIf(CodeGenContext context, Operation operation)
+ {
+ Operand comp = operation.GetSource(2);
+
+ Debug.Assert(comp.Kind == OperandKind.Constant);
+
+ var cond = ((Comparison)comp.AsInt32()).ToX86Condition();
+
+ GenerateCompareCommon(context, operation);
+
+ context.JumpTo(cond, context.CurrBlock.GetSuccessor(1));
+ }
+
+ private static void GenerateByteSwap(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ ValidateUnOp(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Bswap(dest);
+ }
+
+ private static void GenerateCall(CodeGenContext context, Operation operation)
+ {
+ context.Assembler.Call(operation.GetSource(0));
+ }
+
+ private static void GenerateClobber(CodeGenContext context, Operation operation)
+ {
+ // This is only used to indicate that a register is clobbered to the
+ // register allocator, we don't need to produce any code.
+ }
+
+ private static void GenerateCompare(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand comp = operation.GetSource(2);
+
+ Debug.Assert(dest.Type == OperandType.I32);
+ Debug.Assert(comp.Kind == OperandKind.Constant);
+
+ var cond = ((Comparison)comp.AsInt32()).ToX86Condition();
+
+ GenerateCompareCommon(context, operation);
+
+ context.Assembler.Setcc(dest, cond);
+ context.Assembler.Movzx8(dest, dest, OperandType.I32);
+ }
+
+ private static void GenerateCompareCommon(CodeGenContext context, Operation operation)
+ {
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ EnsureSameType(src1, src2);
+
+ Debug.Assert(src1.Type.IsInteger());
+
+ if (src2.Kind == OperandKind.Constant && src2.Value == 0)
+ {
+ if (MatchOperation(operation.ListPrevious, Instruction.BitwiseAnd, src1.Type, src1.GetRegister()))
+ {
+ // Since the `test` and `and` instruction set the status flags in the same way, we can omit the
+ // `test r,r` instruction when it is immediately preceded by an `and r,*` instruction.
+ //
+ // For example:
+ //
+ // and eax, 0x3
+ // test eax, eax
+ // jz .L0
+ //
+ // =>
+ //
+ // and eax, 0x3
+ // jz .L0
+ }
+ else
+ {
+ context.Assembler.Test(src1, src1, src1.Type);
+ }
+ }
+ else
+ {
+ context.Assembler.Cmp(src1, src2, src1.Type);
+ }
+ }
+
+ private static void GenerateCompareAndSwap(CodeGenContext context, Operation operation)
+ {
+ Operand src1 = operation.GetSource(0);
+
+ if (operation.SourcesCount == 5) // CompareAndSwap128 has 5 sources, compared to CompareAndSwap64/32's 3.
+ {
+ Operand memOp = MemoryOp(OperandType.I64, src1);
+
+ context.Assembler.Cmpxchg16b(memOp);
+ }
+ else
+ {
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameType(src2, src3);
+
+ Operand memOp = MemoryOp(src3.Type, src1);
+
+ context.Assembler.Cmpxchg(memOp, src3);
+ }
+ }
+
+ private static void GenerateCompareAndSwap16(CodeGenContext context, Operation operation)
+ {
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameType(src2, src3);
+
+ Operand memOp = MemoryOp(src3.Type, src1);
+
+ context.Assembler.Cmpxchg16(memOp, src3);
+ }
+
+ private static void GenerateCompareAndSwap8(CodeGenContext context, Operation operation)
+ {
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameType(src2, src3);
+
+ Operand memOp = MemoryOp(src3.Type, src1);
+
+ context.Assembler.Cmpxchg8(memOp, src3);
+ }
+
+ private static void GenerateConditionalSelect(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ EnsureSameReg (dest, src3);
+ EnsureSameType(dest, src2, src3);
+
+ Debug.Assert(dest.Type.IsInteger());
+ Debug.Assert(src1.Type == OperandType.I32);
+
+ context.Assembler.Test (src1, src1, src1.Type);
+ context.Assembler.Cmovcc(dest, src2, dest.Type, X86Condition.NotEqual);
+ }
+
+ private static void GenerateConvertI64ToI32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.I32 && source.Type == OperandType.I64);
+
+ context.Assembler.Mov(dest, source, OperandType.I32);
+ }
+
+ private static void GenerateConvertToFP(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64);
+
+ if (dest.Type == OperandType.FP32)
+ {
+ Debug.Assert(source.Type.IsInteger() || source.Type == OperandType.FP64);
+
+ if (source.Type.IsInteger())
+ {
+ context.Assembler.Xorps (dest, dest, dest);
+ context.Assembler.Cvtsi2ss(dest, dest, source, source.Type);
+ }
+ else /* if (source.Type == OperandType.FP64) */
+ {
+ context.Assembler.Cvtsd2ss(dest, dest, source);
+
+ GenerateZeroUpper96(context, dest, dest);
+ }
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ Debug.Assert(source.Type.IsInteger() || source.Type == OperandType.FP32);
+
+ if (source.Type.IsInteger())
+ {
+ context.Assembler.Xorps (dest, dest, dest);
+ context.Assembler.Cvtsi2sd(dest, dest, source, source.Type);
+ }
+ else /* if (source.Type == OperandType.FP32) */
+ {
+ context.Assembler.Cvtss2sd(dest, dest, source);
+
+ GenerateZeroUpper64(context, dest, dest);
+ }
+ }
+ }
+
+ private static void GenerateCopy(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger() || source.Kind != OperandKind.Constant);
+
+ // Moves to the same register are useless.
+ if (dest.Kind == source.Kind && dest.Value == source.Value)
+ {
+ return;
+ }
+
+ if (dest.Kind == OperandKind.Register &&
+ source.Kind == OperandKind.Constant && source.Value == 0)
+ {
+ // Assemble "mov reg, 0" as "xor reg, reg" as the later is more efficient.
+ context.Assembler.Xor(dest, dest, OperandType.I32);
+ }
+ else if (dest.Type.IsInteger())
+ {
+ context.Assembler.Mov(dest, source, dest.Type);
+ }
+ else
+ {
+ context.Assembler.Movdqu(dest, source);
+ }
+ }
+
+ private static void GenerateCountLeadingZeros(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ EnsureSameType(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Bsr(dest, source, dest.Type);
+
+ int operandSize = dest.Type == OperandType.I32 ? 32 : 64;
+ int operandMask = operandSize - 1;
+
+ // When the input operand is 0, the result is undefined, however the
+ // ZF flag is set. We are supposed to return the operand size on that
+ // case. So, add an additional jump to handle that case, by moving the
+ // operand size constant to the destination register.
+ Operand neLabel = Label();
+
+ context.Assembler.Jcc(X86Condition.NotEqual, neLabel);
+
+ context.Assembler.Mov(dest, Const(operandSize | operandMask), OperandType.I32);
+
+ context.Assembler.MarkLabel(neLabel);
+
+ // BSR returns the zero based index of the last bit set on the operand,
+ // starting from the least significant bit. However we are supposed to
+ // return the number of 0 bits on the high end. So, we invert the result
+ // of the BSR using XOR to get the correct value.
+ context.Assembler.Xor(dest, Const(operandMask), OperandType.I32);
+ }
+
+ private static void GenerateDivide(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand dividend = operation.GetSource(0);
+ Operand divisor = operation.GetSource(1);
+
+ if (!dest.Type.IsInteger())
+ {
+ ValidateBinOp(dest, dividend, divisor);
+ }
+
+ if (dest.Type.IsInteger())
+ {
+ divisor = operation.GetSource(2);
+
+ EnsureSameType(dest, divisor);
+
+ if (divisor.Type == OperandType.I32)
+ {
+ context.Assembler.Cdq();
+ }
+ else
+ {
+ context.Assembler.Cqo();
+ }
+
+ context.Assembler.Idiv(divisor);
+ }
+ else if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Divss(dest, dividend, divisor);
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ context.Assembler.Divsd(dest, dividend, divisor);
+ }
+ }
+
+ private static void GenerateDivideUI(CodeGenContext context, Operation operation)
+ {
+ Operand divisor = operation.GetSource(2);
+
+ Operand rdx = Register(X86Register.Rdx);
+
+ Debug.Assert(divisor.Type.IsInteger());
+
+ context.Assembler.Xor(rdx, rdx, OperandType.I32);
+ context.Assembler.Div(divisor);
+ }
+
+ private static void GenerateFill(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand offset = operation.GetSource(0);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+
+ int offs = offset.AsInt32() + context.CallArgsRegionSize;
+
+ Operand rsp = Register(X86Register.Rsp);
+
+ Operand memOp = MemoryOp(dest.Type, rsp, default, Multiplier.x1, offs);
+
+ GenerateLoad(context, memOp, dest);
+ }
+
+ private static void GenerateLoad(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ GenerateLoad(context, address, value);
+ }
+
+ private static void GenerateLoad16(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.Movzx16(value, address, value.Type);
+ }
+
+ private static void GenerateLoad8(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.Destination;
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.Movzx8(value, address, value.Type);
+ }
+
+ private static void GenerateMemoryBarrier(CodeGenContext context, Operation operation)
+ {
+ context.Assembler.LockOr(MemoryOp(OperandType.I64, Register(X86Register.Rsp)), Const(0), OperandType.I32);
+ }
+
+ private static void GenerateMultiply(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ if (src2.Kind != OperandKind.Constant)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ EnsureSameType(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ if (src2.Kind == OperandKind.Constant)
+ {
+ context.Assembler.Imul(dest, src1, src2, dest.Type);
+ }
+ else
+ {
+ context.Assembler.Imul(dest, src2, dest.Type);
+ }
+ }
+ else if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Mulss(dest, src1, src2);
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ context.Assembler.Mulsd(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateMultiply64HighSI(CodeGenContext context, Operation operation)
+ {
+ Operand source = operation.GetSource(1);
+
+ Debug.Assert(source.Type == OperandType.I64);
+
+ context.Assembler.Imul(source);
+ }
+
+ private static void GenerateMultiply64HighUI(CodeGenContext context, Operation operation)
+ {
+ Operand source = operation.GetSource(1);
+
+ Debug.Assert(source.Type == OperandType.I64);
+
+ context.Assembler.Mul(source);
+ }
+
+ private static void GenerateNegate(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ ValidateUnOp(dest, source);
+
+ Debug.Assert(dest.Type.IsInteger());
+
+ context.Assembler.Neg(dest);
+ }
+
+ private static void GenerateReturn(CodeGenContext context, Operation operation)
+ {
+ WriteEpilogue(context);
+
+ context.Assembler.Return();
+ }
+
+ private static void GenerateRotateRight(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Ror(dest, src2, dest.Type);
+ }
+
+ private static void GenerateShiftLeft(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Shl(dest, src2, dest.Type);
+ }
+
+ private static void GenerateShiftRightSI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Sar(dest, src2, dest.Type);
+ }
+
+ private static void GenerateShiftRightUI(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateShift(dest, src1, src2);
+
+ context.Assembler.Shr(dest, src2, dest.Type);
+ }
+
+ private static void GenerateSignExtend16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movsx16(dest, source, dest.Type);
+ }
+
+ private static void GenerateSignExtend32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movsx32(dest, source, dest.Type);
+ }
+
+ private static void GenerateSignExtend8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movsx8(dest, source, dest.Type);
+ }
+
+ private static void GenerateSpill(CodeGenContext context, Operation operation)
+ {
+ GenerateSpill(context, operation, context.CallArgsRegionSize);
+ }
+
+ private static void GenerateSpillArg(CodeGenContext context, Operation operation)
+ {
+ GenerateSpill(context, operation, 0);
+ }
+
+ private static void GenerateSpill(CodeGenContext context, Operation operation, int baseOffset)
+ {
+ Operand offset = operation.GetSource(0);
+ Operand source = operation.GetSource(1);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+
+ int offs = offset.AsInt32() + baseOffset;
+
+ Operand rsp = Register(X86Register.Rsp);
+
+ Operand memOp = MemoryOp(source.Type, rsp, default, Multiplier.x1, offs);
+
+ GenerateStore(context, memOp, source);
+ }
+
+ private static void GenerateStackAlloc(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand offset = operation.GetSource(0);
+
+ Debug.Assert(offset.Kind == OperandKind.Constant);
+
+ int offs = offset.AsInt32() + context.CallArgsRegionSize;
+
+ Operand rsp = Register(X86Register.Rsp);
+
+ Operand memOp = MemoryOp(OperandType.I64, rsp, default, Multiplier.x1, offs);
+
+ context.Assembler.Lea(dest, memOp, OperandType.I64);
+ }
+
+ private static void GenerateStore(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ GenerateStore(context, address, value);
+ }
+
+ private static void GenerateStore16(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.Mov16(address, value);
+ }
+
+ private static void GenerateStore8(CodeGenContext context, Operation operation)
+ {
+ Operand value = operation.GetSource(1);
+ Operand address = Memory(operation.GetSource(0), value.Type);
+
+ Debug.Assert(value.Type.IsInteger());
+
+ context.Assembler.Mov8(address, value);
+ }
+
+ private static void GenerateSubtract(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ ValidateBinOp(dest, src1, src2);
+
+ if (dest.Type.IsInteger())
+ {
+ context.Assembler.Sub(dest, src2, dest.Type);
+ }
+ else if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Subss(dest, src1, src2);
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ context.Assembler.Subsd(dest, src1, src2);
+ }
+ }
+
+ private static void GenerateTailcall(CodeGenContext context, Operation operation)
+ {
+ WriteEpilogue(context);
+
+ context.Assembler.Jmp(operation.GetSource(0));
+ }
+
+ private static void GenerateVectorCreateScalar(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(!dest.Type.IsInteger() && source.Type.IsInteger());
+
+ if (source.Type == OperandType.I32)
+ {
+ context.Assembler.Movd(dest, source); // (__m128i _mm_cvtsi32_si128(int a))
+ }
+ else /* if (source.Type == OperandType.I64) */
+ {
+ context.Assembler.Movq(dest, source); // (__m128i _mm_cvtsi64_si128(__int64 a))
+ }
+ }
+
+ private static void GenerateVectorExtract(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; //Value
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Index
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+
+ byte index = src2.AsByte();
+
+ Debug.Assert(index < OperandType.V128.GetSizeInBytes() / dest.Type.GetSizeInBytes());
+
+ if (dest.Type == OperandType.I32)
+ {
+ if (index == 0)
+ {
+ context.Assembler.Movd(dest, src1);
+ }
+ else if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pextrd(dest, src1, index);
+ }
+ else
+ {
+ int mask0 = 0b11_10_01_00;
+ int mask1 = 0b11_10_01_00;
+
+ mask0 = BitUtils.RotateRight(mask0, index * 2, 8);
+ mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8);
+
+ context.Assembler.Pshufd(src1, src1, (byte)mask0);
+ context.Assembler.Movd (dest, src1);
+ context.Assembler.Pshufd(src1, src1, (byte)mask1);
+ }
+ }
+ else if (dest.Type == OperandType.I64)
+ {
+ if (index == 0)
+ {
+ context.Assembler.Movq(dest, src1);
+ }
+ else if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pextrq(dest, src1, index);
+ }
+ else
+ {
+ const byte mask = 0b01_00_11_10;
+
+ context.Assembler.Pshufd(src1, src1, mask);
+ context.Assembler.Movq (dest, src1);
+ context.Assembler.Pshufd(src1, src1, mask);
+ }
+ }
+ else
+ {
+ // Floating-point types.
+ if ((index >= 2 && dest.Type == OperandType.FP32) ||
+ (index == 1 && dest.Type == OperandType.FP64))
+ {
+ context.Assembler.Movhlps(dest, dest, src1);
+ context.Assembler.Movq (dest, dest);
+ }
+ else
+ {
+ context.Assembler.Movq(dest, src1);
+ }
+
+ if (dest.Type == OperandType.FP32)
+ {
+ context.Assembler.Pshufd(dest, dest, (byte)(0xfc | (index & 1)));
+ }
+ }
+ }
+
+ private static void GenerateVectorExtract16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; //Value
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Index
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+
+ byte index = src2.AsByte();
+
+ Debug.Assert(index < 8);
+
+ context.Assembler.Pextrw(dest, src1, index);
+ }
+
+ private static void GenerateVectorExtract8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination; //Value
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Index
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src2.Kind == OperandKind.Constant);
+
+ byte index = src2.AsByte();
+
+ Debug.Assert(index < 16);
+
+ if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pextrb(dest, src1, index);
+ }
+ else
+ {
+ context.Assembler.Pextrw(dest, src1, (byte)(index >> 1));
+
+ if ((index & 1) != 0)
+ {
+ context.Assembler.Shr(dest, Const(8), OperandType.I32);
+ }
+ else
+ {
+ context.Assembler.Movzx8(dest, dest, OperandType.I32);
+ }
+ }
+ }
+
+ private static void GenerateVectorInsert(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Value
+ Operand src3 = operation.GetSource(2); //Index
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ void InsertIntSse2(int words)
+ {
+ if (dest.GetRegister() != src1.GetRegister())
+ {
+ context.Assembler.Movdqu(dest, src1);
+ }
+
+ for (int word = 0; word < words; word++)
+ {
+ // Insert lower 16-bits.
+ context.Assembler.Pinsrw(dest, dest, src2, (byte)(index * words + word));
+
+ // Move next word down.
+ context.Assembler.Ror(src2, Const(16), src2.Type);
+ }
+ }
+
+ if (src2.Type == OperandType.I32)
+ {
+ Debug.Assert(index < 4);
+
+ if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pinsrd(dest, src1, src2, index);
+ }
+ else
+ {
+ InsertIntSse2(2);
+ }
+ }
+ else if (src2.Type == OperandType.I64)
+ {
+ Debug.Assert(index < 2);
+
+ if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Pinsrq(dest, src1, src2, index);
+ }
+ else
+ {
+ InsertIntSse2(4);
+ }
+ }
+ else if (src2.Type == OperandType.FP32)
+ {
+ Debug.Assert(index < 4);
+
+ if (index != 0)
+ {
+ if (HardwareCapabilities.SupportsSse41)
+ {
+ context.Assembler.Insertps(dest, src1, src2, (byte)(index << 4));
+ }
+ else
+ {
+ if (src1.GetRegister() == src2.GetRegister())
+ {
+ int mask = 0b11_10_01_00;
+
+ mask &= ~(0b11 << index * 2);
+
+ context.Assembler.Pshufd(dest, src1, (byte)mask);
+ }
+ else
+ {
+ int mask0 = 0b11_10_01_00;
+ int mask1 = 0b11_10_01_00;
+
+ mask0 = BitUtils.RotateRight(mask0, index * 2, 8);
+ mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8);
+
+ context.Assembler.Pshufd(src1, src1, (byte)mask0); // Lane to be inserted in position 0.
+ context.Assembler.Movss (dest, src1, src2); // dest[127:0] = src1[127:32] | src2[31:0]
+ context.Assembler.Pshufd(dest, dest, (byte)mask1); // Inserted lane in original position.
+
+ if (dest.GetRegister() != src1.GetRegister())
+ {
+ context.Assembler.Pshufd(src1, src1, (byte)mask1); // Restore src1.
+ }
+ }
+ }
+ }
+ else
+ {
+ context.Assembler.Movss(dest, src1, src2);
+ }
+ }
+ else /* if (src2.Type == OperandType.FP64) */
+ {
+ Debug.Assert(index < 2);
+
+ if (index != 0)
+ {
+ context.Assembler.Movlhps(dest, src1, src2);
+ }
+ else
+ {
+ context.Assembler.Movsd(dest, src1, src2);
+ }
+ }
+ }
+
+ private static void GenerateVectorInsert16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Value
+ Operand src3 = operation.GetSource(2); //Index
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ context.Assembler.Pinsrw(dest, src1, src2, index);
+ }
+
+ private static void GenerateVectorInsert8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand src1 = operation.GetSource(0); //Vector
+ Operand src2 = operation.GetSource(1); //Value
+ Operand src3 = operation.GetSource(2); //Index
+
+ // It's not possible to emulate this instruction without
+ // SSE 4.1 support without the use of a temporary register,
+ // so we instead handle that case on the pre-allocator when
+ // SSE 4.1 is not supported on the CPU.
+ Debug.Assert(HardwareCapabilities.SupportsSse41);
+
+ if (!HardwareCapabilities.SupportsVexEncoding)
+ {
+ EnsureSameReg(dest, src1);
+ }
+
+ Debug.Assert(src1.Type == OperandType.V128);
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ context.Assembler.Pinsrb(dest, src1, src2, index);
+ }
+
+ private static void GenerateVectorOne(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ context.Assembler.Pcmpeqw(dest, dest, dest);
+ }
+
+ private static void GenerateVectorZero(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+
+ Debug.Assert(!dest.Type.IsInteger());
+
+ context.Assembler.Xorps(dest, dest, dest);
+ }
+
+ private static void GenerateVectorZeroUpper64(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128);
+
+ GenerateZeroUpper64(context, dest, source);
+ }
+
+ private static void GenerateVectorZeroUpper96(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128);
+
+ GenerateZeroUpper96(context, dest, source);
+ }
+
+ private static void GenerateZeroExtend16(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movzx16(dest, source, OperandType.I32);
+ }
+
+ private static void GenerateZeroExtend32(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ // We can eliminate the move if source is already 32-bit and the registers are the same.
+ if (dest.Value == source.Value && source.Type == OperandType.I32)
+ {
+ return;
+ }
+
+ context.Assembler.Mov(dest, source, OperandType.I32);
+ }
+
+ private static void GenerateZeroExtend8(CodeGenContext context, Operation operation)
+ {
+ Operand dest = operation.Destination;
+ Operand source = operation.GetSource(0);
+
+ Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger());
+
+ context.Assembler.Movzx8(dest, source, OperandType.I32);
+ }
+
+ private static void GenerateLoad(CodeGenContext context, Operand address, Operand value)
+ {
+ switch (value.Type)
+ {
+ case OperandType.I32: context.Assembler.Mov (value, address, OperandType.I32); break;
+ case OperandType.I64: context.Assembler.Mov (value, address, OperandType.I64); break;
+ case OperandType.FP32: context.Assembler.Movd (value, address); break;
+ case OperandType.FP64: context.Assembler.Movq (value, address); break;
+ case OperandType.V128: context.Assembler.Movdqu(value, address); break;
+
+ default: Debug.Assert(false); break;
+ }
+ }
+
+ private static void GenerateStore(CodeGenContext context, Operand address, Operand value)
+ {
+ switch (value.Type)
+ {
+ case OperandType.I32: context.Assembler.Mov (address, value, OperandType.I32); break;
+ case OperandType.I64: context.Assembler.Mov (address, value, OperandType.I64); break;
+ case OperandType.FP32: context.Assembler.Movd (address, value); break;
+ case OperandType.FP64: context.Assembler.Movq (address, value); break;
+ case OperandType.V128: context.Assembler.Movdqu(address, value); break;
+
+ default: Debug.Assert(false); break;
+ }
+ }
+
+ private static void GenerateZeroUpper64(CodeGenContext context, Operand dest, Operand source)
+ {
+ context.Assembler.Movq(dest, source);
+ }
+
+ private static void GenerateZeroUpper96(CodeGenContext context, Operand dest, Operand source)
+ {
+ context.Assembler.Movq(dest, source);
+ context.Assembler.Pshufd(dest, dest, 0xfc);
+ }
+
+ private static bool MatchOperation(Operation node, Instruction inst, OperandType destType, Register destReg)
+ {
+ if (node == default || node.DestinationsCount == 0)
+ {
+ return false;
+ }
+
+ if (node.Instruction != inst)
+ {
+ return false;
+ }
+
+ Operand dest = node.Destination;
+
+ return dest.Kind == OperandKind.Register &&
+ dest.Type == destType &&
+ dest.GetRegister() == destReg;
+ }
+
+ [Conditional("DEBUG")]
+ private static void ValidateUnOp(Operand dest, Operand source)
+ {
+ EnsureSameReg (dest, source);
+ EnsureSameType(dest, source);
+ }
+
+ [Conditional("DEBUG")]
+ private static void ValidateBinOp(Operand dest, Operand src1, Operand src2)
+ {
+ EnsureSameReg (dest, src1);
+ EnsureSameType(dest, src1, src2);
+ }
+
+ [Conditional("DEBUG")]
+ private static void ValidateShift(Operand dest, Operand src1, Operand src2)
+ {
+ EnsureSameReg (dest, src1);
+ EnsureSameType(dest, src1);
+
+ Debug.Assert(dest.Type.IsInteger() && src2.Type == OperandType.I32);
+ }
+
+ private static void EnsureSameReg(Operand op1, Operand op2)
+ {
+ if (!op1.Type.IsInteger() && HardwareCapabilities.SupportsVexEncoding)
+ {
+ return;
+ }
+
+ Debug.Assert(op1.Kind == OperandKind.Register || op1.Kind == OperandKind.Memory);
+ Debug.Assert(op1.Kind == op2.Kind);
+ Debug.Assert(op1.Value == op2.Value);
+ }
+
+ private static void EnsureSameType(Operand op1, Operand op2)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ }
+
+ private static void EnsureSameType(Operand op1, Operand op2, Operand op3)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ Debug.Assert(op1.Type == op3.Type);
+ }
+
+ private static void EnsureSameType(Operand op1, Operand op2, Operand op3, Operand op4)
+ {
+ Debug.Assert(op1.Type == op2.Type);
+ Debug.Assert(op1.Type == op3.Type);
+ Debug.Assert(op1.Type == op4.Type);
+ }
+
+ private static UnwindInfo WritePrologue(CodeGenContext context)
+ {
+ List<UnwindPushEntry> pushEntries = new List<UnwindPushEntry>();
+
+ Operand rsp = Register(X86Register.Rsp);
+
+ int mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
+
+ while (mask != 0)
+ {
+ int bit = BitOperations.TrailingZeroCount(mask);
+
+ context.Assembler.Push(Register((X86Register)bit));
+
+ pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: bit));
+
+ mask &= ~(1 << bit);
+ }
+
+ int reservedStackSize = context.CallArgsRegionSize + context.AllocResult.SpillRegionSize;
+
+ reservedStackSize += context.XmmSaveRegionSize;
+
+ if (reservedStackSize >= StackGuardSize)
+ {
+ GenerateInlineStackProbe(context, reservedStackSize);
+ }
+
+ if (reservedStackSize != 0)
+ {
+ context.Assembler.Sub(rsp, Const(reservedStackSize), OperandType.I64);
+
+ pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.AllocStack, context.StreamOffset, stackOffsetOrAllocSize: reservedStackSize));
+ }
+
+ int offset = reservedStackSize;
+
+ mask = CallingConvention.GetVecCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters;
+
+ while (mask != 0)
+ {
+ int bit = BitOperations.TrailingZeroCount(mask);
+
+ offset -= 16;
+
+ Operand memOp = MemoryOp(OperandType.V128, rsp, default, Multiplier.x1, offset);
+
+ context.Assembler.Movdqu(memOp, Xmm((X86Register)bit));
+
+ pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.SaveXmm128, context.StreamOffset, bit, offset));
+
+ mask &= ~(1 << bit);
+ }
+
+ return new UnwindInfo(pushEntries.ToArray(), context.StreamOffset);
+ }
+
+ private static void WriteEpilogue(CodeGenContext context)
+ {
+ Operand rsp = Register(X86Register.Rsp);
+
+ int reservedStackSize = context.CallArgsRegionSize + context.AllocResult.SpillRegionSize;
+
+ reservedStackSize += context.XmmSaveRegionSize;
+
+ int offset = reservedStackSize;
+
+ int mask = CallingConvention.GetVecCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters;
+
+ while (mask != 0)
+ {
+ int bit = BitOperations.TrailingZeroCount(mask);
+
+ offset -= 16;
+
+ Operand memOp = MemoryOp(OperandType.V128, rsp, default, Multiplier.x1, offset);
+
+ context.Assembler.Movdqu(Xmm((X86Register)bit), memOp);
+
+ mask &= ~(1 << bit);
+ }
+
+ if (reservedStackSize != 0)
+ {
+ context.Assembler.Add(rsp, Const(reservedStackSize), OperandType.I64);
+ }
+
+ mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters;
+
+ while (mask != 0)
+ {
+ int bit = BitUtils.HighestBitSet(mask);
+
+ context.Assembler.Pop(Register((X86Register)bit));
+
+ mask &= ~(1 << bit);
+ }
+ }
+
+ private static void GenerateInlineStackProbe(CodeGenContext context, int size)
+ {
+ // Windows does lazy stack allocation, and there are just 2
+ // guard pages on the end of the stack. So, if the allocation
+ // size we make is greater than this guard size, we must ensure
+ // that the OS will map all pages that we'll use. We do that by
+ // doing a dummy read on those pages, forcing a page fault and
+ // the OS to map them. If they are already mapped, nothing happens.
+ const int pageMask = PageSize - 1;
+
+ size = (size + pageMask) & ~pageMask;
+
+ Operand rsp = Register(X86Register.Rsp);
+ Operand temp = Register(CallingConvention.GetIntReturnRegister());
+
+ for (int offset = PageSize; offset < size; offset += PageSize)
+ {
+ Operand memOp = MemoryOp(OperandType.I32, rsp, default, Multiplier.x1, -offset);
+
+ context.Assembler.Mov(temp, memOp, OperandType.I32);
+ }
+ }
+
+ private static Operand Memory(Operand operand, OperandType type)
+ {
+ if (operand.Kind == OperandKind.Memory)
+ {
+ return operand;
+ }
+
+ return MemoryOp(type, operand);
+ }
+
+ private static Operand Register(X86Register register, OperandType type = OperandType.I64)
+ {
+ return Operand.Factory.Register((int)register, RegisterType.Integer, type);
+ }
+
+ private static Operand Xmm(X86Register register)
+ {
+ return Operand.Factory.Register((int)register, RegisterType.Vector, OperandType.V128);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs b/src/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
new file mode 100644
index 00000000..07cdcd09
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs
@@ -0,0 +1,144 @@
+using Ryujinx.Memory;
+using System;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class HardwareCapabilities
+ {
+ private delegate uint GetXcr0();
+
+ static HardwareCapabilities()
+ {
+ if (!X86Base.IsSupported)
+ {
+ return;
+ }
+
+ (int maxNum, _, _, _) = X86Base.CpuId(0x00000000, 0x00000000);
+
+ (_, _, int ecx1, int edx1) = X86Base.CpuId(0x00000001, 0x00000000);
+ FeatureInfo1Edx = (FeatureFlags1Edx)edx1;
+ FeatureInfo1Ecx = (FeatureFlags1Ecx)ecx1;
+
+ if (maxNum >= 7)
+ {
+ (_, int ebx7, int ecx7, _) = X86Base.CpuId(0x00000007, 0x00000000);
+ FeatureInfo7Ebx = (FeatureFlags7Ebx)ebx7;
+ FeatureInfo7Ecx = (FeatureFlags7Ecx)ecx7;
+ }
+
+ Xcr0InfoEax = (Xcr0FlagsEax)GetXcr0Eax();
+ }
+
+ private static uint GetXcr0Eax()
+ {
+ if (!FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Xsave))
+ {
+ // XSAVE feature required for xgetbv
+ return 0;
+ }
+
+ ReadOnlySpan<byte> asmGetXcr0 = new byte[]
+ {
+ 0x31, 0xc9, // xor ecx, ecx
+ 0xf, 0x01, 0xd0, // xgetbv
+ 0xc3, // ret
+ };
+
+ using MemoryBlock memGetXcr0 = new MemoryBlock((ulong)asmGetXcr0.Length);
+
+ memGetXcr0.Write(0, asmGetXcr0);
+
+ memGetXcr0.Reprotect(0, (ulong)asmGetXcr0.Length, MemoryPermission.ReadAndExecute);
+
+ var fGetXcr0 = Marshal.GetDelegateForFunctionPointer<GetXcr0>(memGetXcr0.Pointer);
+
+ return fGetXcr0();
+ }
+
+ [Flags]
+ public enum FeatureFlags1Edx
+ {
+ Sse = 1 << 25,
+ Sse2 = 1 << 26
+ }
+
+ [Flags]
+ public enum FeatureFlags1Ecx
+ {
+ Sse3 = 1 << 0,
+ Pclmulqdq = 1 << 1,
+ Ssse3 = 1 << 9,
+ Fma = 1 << 12,
+ Sse41 = 1 << 19,
+ Sse42 = 1 << 20,
+ Popcnt = 1 << 23,
+ Aes = 1 << 25,
+ Xsave = 1 << 26,
+ Osxsave = 1 << 27,
+ Avx = 1 << 28,
+ F16c = 1 << 29
+ }
+
+ [Flags]
+ public enum FeatureFlags7Ebx
+ {
+ Avx2 = 1 << 5,
+ Avx512f = 1 << 16,
+ Avx512dq = 1 << 17,
+ Sha = 1 << 29,
+ Avx512bw = 1 << 30,
+ Avx512vl = 1 << 31
+ }
+
+ [Flags]
+ public enum FeatureFlags7Ecx
+ {
+ Gfni = 1 << 8,
+ }
+
+ [Flags]
+ public enum Xcr0FlagsEax
+ {
+ Sse = 1 << 1,
+ YmmHi128 = 1 << 2,
+ Opmask = 1 << 5,
+ ZmmHi256 = 1 << 6,
+ Hi16Zmm = 1 << 7
+ }
+
+ public static FeatureFlags1Edx FeatureInfo1Edx { get; }
+ public static FeatureFlags1Ecx FeatureInfo1Ecx { get; }
+ public static FeatureFlags7Ebx FeatureInfo7Ebx { get; } = 0;
+ public static FeatureFlags7Ecx FeatureInfo7Ecx { get; } = 0;
+ public static Xcr0FlagsEax Xcr0InfoEax { get; } = 0;
+
+ public static bool SupportsSse => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse);
+ public static bool SupportsSse2 => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse2);
+ public static bool SupportsSse3 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse3);
+ public static bool SupportsPclmulqdq => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Pclmulqdq);
+ public static bool SupportsSsse3 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Ssse3);
+ public static bool SupportsFma => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Fma);
+ public static bool SupportsSse41 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse41);
+ public static bool SupportsSse42 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse42);
+ public static bool SupportsPopcnt => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Popcnt);
+ public static bool SupportsAesni => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Aes);
+ public static bool SupportsAvx => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Avx | FeatureFlags1Ecx.Xsave | FeatureFlags1Ecx.Osxsave) && Xcr0InfoEax.HasFlag(Xcr0FlagsEax.Sse | Xcr0FlagsEax.YmmHi128);
+ public static bool SupportsAvx2 => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx2) && SupportsAvx;
+ public static bool SupportsAvx512F => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512f) && FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Xsave | FeatureFlags1Ecx.Osxsave)
+ && Xcr0InfoEax.HasFlag(Xcr0FlagsEax.Sse | Xcr0FlagsEax.YmmHi128 | Xcr0FlagsEax.Opmask | Xcr0FlagsEax.ZmmHi256 | Xcr0FlagsEax.Hi16Zmm);
+ public static bool SupportsAvx512Vl => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512vl) && SupportsAvx512F;
+ public static bool SupportsAvx512Bw => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512bw) && SupportsAvx512F;
+ public static bool SupportsAvx512Dq => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512dq) && SupportsAvx512F;
+ public static bool SupportsF16c => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.F16c);
+ public static bool SupportsSha => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Sha);
+ public static bool SupportsGfni => FeatureInfo7Ecx.HasFlag(FeatureFlags7Ecx.Gfni);
+
+ public static bool ForceLegacySse { get; set; }
+
+ public static bool SupportsVexEncoding => SupportsAvx && !ForceLegacySse;
+ public static bool SupportsEvexEncoding => SupportsAvx512F && !ForceLegacySse;
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs b/src/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs
new file mode 100644
index 00000000..302bf4d3
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs
@@ -0,0 +1,14 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ readonly struct IntrinsicInfo
+ {
+ public X86Instruction Inst { get; }
+ public IntrinsicType Type { get; }
+
+ public IntrinsicInfo(X86Instruction inst, IntrinsicType type)
+ {
+ Inst = inst;
+ Type = type;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/src/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
new file mode 100644
index 00000000..e3d94b7a
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/IntrinsicTable.cs
@@ -0,0 +1,200 @@
+using ARMeilleure.Common;
+using ARMeilleure.IntermediateRepresentation;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class IntrinsicTable
+ {
+ private static IntrinsicInfo[] _intrinTable;
+
+ static IntrinsicTable()
+ {
+ _intrinTable = new IntrinsicInfo[EnumUtils.GetCount(typeof(Intrinsic))];
+
+ Add(Intrinsic.X86Addpd, new IntrinsicInfo(X86Instruction.Addpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Addps, new IntrinsicInfo(X86Instruction.Addps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Addsd, new IntrinsicInfo(X86Instruction.Addsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Addss, new IntrinsicInfo(X86Instruction.Addss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Aesdec, new IntrinsicInfo(X86Instruction.Aesdec, IntrinsicType.Binary));
+ Add(Intrinsic.X86Aesdeclast, new IntrinsicInfo(X86Instruction.Aesdeclast, IntrinsicType.Binary));
+ Add(Intrinsic.X86Aesenc, new IntrinsicInfo(X86Instruction.Aesenc, IntrinsicType.Binary));
+ Add(Intrinsic.X86Aesenclast, new IntrinsicInfo(X86Instruction.Aesenclast, IntrinsicType.Binary));
+ Add(Intrinsic.X86Aesimc, new IntrinsicInfo(X86Instruction.Aesimc, IntrinsicType.Unary));
+ Add(Intrinsic.X86Andnpd, new IntrinsicInfo(X86Instruction.Andnpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Andnps, new IntrinsicInfo(X86Instruction.Andnps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Andpd, new IntrinsicInfo(X86Instruction.Andpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Andps, new IntrinsicInfo(X86Instruction.Andps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Blendvpd, new IntrinsicInfo(X86Instruction.Blendvpd, IntrinsicType.Ternary));
+ Add(Intrinsic.X86Blendvps, new IntrinsicInfo(X86Instruction.Blendvps, IntrinsicType.Ternary));
+ Add(Intrinsic.X86Cmppd, new IntrinsicInfo(X86Instruction.Cmppd, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Cmpps, new IntrinsicInfo(X86Instruction.Cmpps, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Cmpsd, new IntrinsicInfo(X86Instruction.Cmpsd, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Cmpss, new IntrinsicInfo(X86Instruction.Cmpss, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Comisdeq, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comisdge, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comisdlt, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comisseq, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comissge, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Comisslt, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_));
+ Add(Intrinsic.X86Crc32, new IntrinsicInfo(X86Instruction.Crc32, IntrinsicType.Crc32));
+ Add(Intrinsic.X86Crc32_16, new IntrinsicInfo(X86Instruction.Crc32_16, IntrinsicType.Crc32));
+ Add(Intrinsic.X86Crc32_8, new IntrinsicInfo(X86Instruction.Crc32_8, IntrinsicType.Crc32));
+ Add(Intrinsic.X86Cvtdq2pd, new IntrinsicInfo(X86Instruction.Cvtdq2pd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtdq2ps, new IntrinsicInfo(X86Instruction.Cvtdq2ps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtpd2dq, new IntrinsicInfo(X86Instruction.Cvtpd2dq, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtpd2ps, new IntrinsicInfo(X86Instruction.Cvtpd2ps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtps2dq, new IntrinsicInfo(X86Instruction.Cvtps2dq, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtps2pd, new IntrinsicInfo(X86Instruction.Cvtps2pd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Cvtsd2si, new IntrinsicInfo(X86Instruction.Cvtsd2si, IntrinsicType.UnaryToGpr));
+ Add(Intrinsic.X86Cvtsd2ss, new IntrinsicInfo(X86Instruction.Cvtsd2ss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Cvtsi2sd, new IntrinsicInfo(X86Instruction.Cvtsi2sd, IntrinsicType.BinaryGpr));
+ Add(Intrinsic.X86Cvtsi2si, new IntrinsicInfo(X86Instruction.Movd, IntrinsicType.UnaryToGpr));
+ Add(Intrinsic.X86Cvtsi2ss, new IntrinsicInfo(X86Instruction.Cvtsi2ss, IntrinsicType.BinaryGpr));
+ Add(Intrinsic.X86Cvtss2sd, new IntrinsicInfo(X86Instruction.Cvtss2sd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Cvtss2si, new IntrinsicInfo(X86Instruction.Cvtss2si, IntrinsicType.UnaryToGpr));
+ Add(Intrinsic.X86Divpd, new IntrinsicInfo(X86Instruction.Divpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Divps, new IntrinsicInfo(X86Instruction.Divps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Divsd, new IntrinsicInfo(X86Instruction.Divsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Divss, new IntrinsicInfo(X86Instruction.Divss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Gf2p8affineqb, new IntrinsicInfo(X86Instruction.Gf2p8affineqb, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Haddpd, new IntrinsicInfo(X86Instruction.Haddpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Haddps, new IntrinsicInfo(X86Instruction.Haddps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Insertps, new IntrinsicInfo(X86Instruction.Insertps, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Ldmxcsr, new IntrinsicInfo(X86Instruction.None, IntrinsicType.Mxcsr));
+ Add(Intrinsic.X86Maxpd, new IntrinsicInfo(X86Instruction.Maxpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Maxps, new IntrinsicInfo(X86Instruction.Maxps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Maxsd, new IntrinsicInfo(X86Instruction.Maxsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Maxss, new IntrinsicInfo(X86Instruction.Maxss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Minpd, new IntrinsicInfo(X86Instruction.Minpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Minps, new IntrinsicInfo(X86Instruction.Minps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Minsd, new IntrinsicInfo(X86Instruction.Minsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Minss, new IntrinsicInfo(X86Instruction.Minss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Movhlps, new IntrinsicInfo(X86Instruction.Movhlps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Movlhps, new IntrinsicInfo(X86Instruction.Movlhps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Movss, new IntrinsicInfo(X86Instruction.Movss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Mulpd, new IntrinsicInfo(X86Instruction.Mulpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Mulps, new IntrinsicInfo(X86Instruction.Mulps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Mulsd, new IntrinsicInfo(X86Instruction.Mulsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Mulss, new IntrinsicInfo(X86Instruction.Mulss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Paddb, new IntrinsicInfo(X86Instruction.Paddb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Paddd, new IntrinsicInfo(X86Instruction.Paddd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Paddq, new IntrinsicInfo(X86Instruction.Paddq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Paddw, new IntrinsicInfo(X86Instruction.Paddw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Palignr, new IntrinsicInfo(X86Instruction.Palignr, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Pand, new IntrinsicInfo(X86Instruction.Pand, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pandn, new IntrinsicInfo(X86Instruction.Pandn, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pavgb, new IntrinsicInfo(X86Instruction.Pavgb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pavgw, new IntrinsicInfo(X86Instruction.Pavgw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pblendvb, new IntrinsicInfo(X86Instruction.Pblendvb, IntrinsicType.Ternary));
+ Add(Intrinsic.X86Pclmulqdq, new IntrinsicInfo(X86Instruction.Pclmulqdq, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Pcmpeqb, new IntrinsicInfo(X86Instruction.Pcmpeqb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpeqd, new IntrinsicInfo(X86Instruction.Pcmpeqd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpeqq, new IntrinsicInfo(X86Instruction.Pcmpeqq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpeqw, new IntrinsicInfo(X86Instruction.Pcmpeqw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpgtb, new IntrinsicInfo(X86Instruction.Pcmpgtb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpgtd, new IntrinsicInfo(X86Instruction.Pcmpgtd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpgtq, new IntrinsicInfo(X86Instruction.Pcmpgtq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pcmpgtw, new IntrinsicInfo(X86Instruction.Pcmpgtw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxsb, new IntrinsicInfo(X86Instruction.Pmaxsb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxsd, new IntrinsicInfo(X86Instruction.Pmaxsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxsw, new IntrinsicInfo(X86Instruction.Pmaxsw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxub, new IntrinsicInfo(X86Instruction.Pmaxub, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxud, new IntrinsicInfo(X86Instruction.Pmaxud, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmaxuw, new IntrinsicInfo(X86Instruction.Pmaxuw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminsb, new IntrinsicInfo(X86Instruction.Pminsb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminsd, new IntrinsicInfo(X86Instruction.Pminsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminsw, new IntrinsicInfo(X86Instruction.Pminsw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminub, new IntrinsicInfo(X86Instruction.Pminub, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminud, new IntrinsicInfo(X86Instruction.Pminud, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pminuw, new IntrinsicInfo(X86Instruction.Pminuw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmovsxbw, new IntrinsicInfo(X86Instruction.Pmovsxbw, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovsxdq, new IntrinsicInfo(X86Instruction.Pmovsxdq, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovsxwd, new IntrinsicInfo(X86Instruction.Pmovsxwd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovzxbw, new IntrinsicInfo(X86Instruction.Pmovzxbw, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovzxdq, new IntrinsicInfo(X86Instruction.Pmovzxdq, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmovzxwd, new IntrinsicInfo(X86Instruction.Pmovzxwd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Pmulld, new IntrinsicInfo(X86Instruction.Pmulld, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pmullw, new IntrinsicInfo(X86Instruction.Pmullw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Popcnt, new IntrinsicInfo(X86Instruction.Popcnt, IntrinsicType.PopCount));
+ Add(Intrinsic.X86Por, new IntrinsicInfo(X86Instruction.Por, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pshufb, new IntrinsicInfo(X86Instruction.Pshufb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pshufd, new IntrinsicInfo(X86Instruction.Pshufd, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Pslld, new IntrinsicInfo(X86Instruction.Pslld, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pslldq, new IntrinsicInfo(X86Instruction.Pslldq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psllq, new IntrinsicInfo(X86Instruction.Psllq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psllw, new IntrinsicInfo(X86Instruction.Psllw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrad, new IntrinsicInfo(X86Instruction.Psrad, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psraw, new IntrinsicInfo(X86Instruction.Psraw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrld, new IntrinsicInfo(X86Instruction.Psrld, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrlq, new IntrinsicInfo(X86Instruction.Psrlq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrldq, new IntrinsicInfo(X86Instruction.Psrldq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psrlw, new IntrinsicInfo(X86Instruction.Psrlw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psubb, new IntrinsicInfo(X86Instruction.Psubb, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psubd, new IntrinsicInfo(X86Instruction.Psubd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psubq, new IntrinsicInfo(X86Instruction.Psubq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Psubw, new IntrinsicInfo(X86Instruction.Psubw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckhbw, new IntrinsicInfo(X86Instruction.Punpckhbw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckhdq, new IntrinsicInfo(X86Instruction.Punpckhdq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckhqdq, new IntrinsicInfo(X86Instruction.Punpckhqdq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckhwd, new IntrinsicInfo(X86Instruction.Punpckhwd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpcklbw, new IntrinsicInfo(X86Instruction.Punpcklbw, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpckldq, new IntrinsicInfo(X86Instruction.Punpckldq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpcklqdq, new IntrinsicInfo(X86Instruction.Punpcklqdq, IntrinsicType.Binary));
+ Add(Intrinsic.X86Punpcklwd, new IntrinsicInfo(X86Instruction.Punpcklwd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Pxor, new IntrinsicInfo(X86Instruction.Pxor, IntrinsicType.Binary));
+ Add(Intrinsic.X86Rcpps, new IntrinsicInfo(X86Instruction.Rcpps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Rcpss, new IntrinsicInfo(X86Instruction.Rcpss, IntrinsicType.Unary));
+ Add(Intrinsic.X86Roundpd, new IntrinsicInfo(X86Instruction.Roundpd, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Roundps, new IntrinsicInfo(X86Instruction.Roundps, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Roundsd, new IntrinsicInfo(X86Instruction.Roundsd, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Roundss, new IntrinsicInfo(X86Instruction.Roundss, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Rsqrtps, new IntrinsicInfo(X86Instruction.Rsqrtps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Rsqrtss, new IntrinsicInfo(X86Instruction.Rsqrtss, IntrinsicType.Unary));
+ Add(Intrinsic.X86Sha256Msg1, new IntrinsicInfo(X86Instruction.Sha256Msg1, IntrinsicType.Binary));
+ Add(Intrinsic.X86Sha256Msg2, new IntrinsicInfo(X86Instruction.Sha256Msg2, IntrinsicType.Binary));
+ Add(Intrinsic.X86Sha256Rnds2, new IntrinsicInfo(X86Instruction.Sha256Rnds2, IntrinsicType.Ternary));
+ Add(Intrinsic.X86Shufpd, new IntrinsicInfo(X86Instruction.Shufpd, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Shufps, new IntrinsicInfo(X86Instruction.Shufps, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Sqrtpd, new IntrinsicInfo(X86Instruction.Sqrtpd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Sqrtps, new IntrinsicInfo(X86Instruction.Sqrtps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Sqrtsd, new IntrinsicInfo(X86Instruction.Sqrtsd, IntrinsicType.Unary));
+ Add(Intrinsic.X86Sqrtss, new IntrinsicInfo(X86Instruction.Sqrtss, IntrinsicType.Unary));
+ Add(Intrinsic.X86Stmxcsr, new IntrinsicInfo(X86Instruction.None, IntrinsicType.Mxcsr));
+ Add(Intrinsic.X86Subpd, new IntrinsicInfo(X86Instruction.Subpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Subps, new IntrinsicInfo(X86Instruction.Subps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Subsd, new IntrinsicInfo(X86Instruction.Subsd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Subss, new IntrinsicInfo(X86Instruction.Subss, IntrinsicType.Binary));
+ Add(Intrinsic.X86Unpckhpd, new IntrinsicInfo(X86Instruction.Unpckhpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Unpckhps, new IntrinsicInfo(X86Instruction.Unpckhps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Unpcklpd, new IntrinsicInfo(X86Instruction.Unpcklpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Unpcklps, new IntrinsicInfo(X86Instruction.Unpcklps, IntrinsicType.Binary));
+ Add(Intrinsic.X86Vcvtph2ps, new IntrinsicInfo(X86Instruction.Vcvtph2ps, IntrinsicType.Unary));
+ Add(Intrinsic.X86Vcvtps2ph, new IntrinsicInfo(X86Instruction.Vcvtps2ph, IntrinsicType.BinaryImm));
+ Add(Intrinsic.X86Vfmadd231pd, new IntrinsicInfo(X86Instruction.Vfmadd231pd, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfmadd231ps, new IntrinsicInfo(X86Instruction.Vfmadd231ps, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfmadd231sd, new IntrinsicInfo(X86Instruction.Vfmadd231sd, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfmadd231ss, new IntrinsicInfo(X86Instruction.Vfmadd231ss, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfmsub231sd, new IntrinsicInfo(X86Instruction.Vfmsub231sd, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfmsub231ss, new IntrinsicInfo(X86Instruction.Vfmsub231ss, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfnmadd231pd, new IntrinsicInfo(X86Instruction.Vfnmadd231pd, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfnmadd231ps, new IntrinsicInfo(X86Instruction.Vfnmadd231ps, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfnmadd231sd, new IntrinsicInfo(X86Instruction.Vfnmadd231sd, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfnmadd231ss, new IntrinsicInfo(X86Instruction.Vfnmadd231ss, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfnmsub231sd, new IntrinsicInfo(X86Instruction.Vfnmsub231sd, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vfnmsub231ss, new IntrinsicInfo(X86Instruction.Vfnmsub231ss, IntrinsicType.Fma));
+ Add(Intrinsic.X86Vpternlogd, new IntrinsicInfo(X86Instruction.Vpternlogd, IntrinsicType.TernaryImm));
+ Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary));
+ Add(Intrinsic.X86Xorps, new IntrinsicInfo(X86Instruction.Xorps, IntrinsicType.Binary));
+ }
+
+ private static void Add(Intrinsic intrin, IntrinsicInfo info)
+ {
+ _intrinTable[(int)intrin] = info;
+ }
+
+ public static IntrinsicInfo GetInfo(Intrinsic intrin)
+ {
+ return _intrinTable[(int)intrin];
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/IntrinsicType.cs b/src/ARMeilleure/CodeGen/X86/IntrinsicType.cs
new file mode 100644
index 00000000..5a9c14af
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/IntrinsicType.cs
@@ -0,0 +1,18 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ enum IntrinsicType
+ {
+ Comis_,
+ Mxcsr,
+ PopCount,
+ Unary,
+ UnaryToGpr,
+ Binary,
+ BinaryGpr,
+ BinaryImm,
+ Crc32,
+ Ternary,
+ TernaryImm,
+ Fma
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/Mxcsr.cs b/src/ARMeilleure/CodeGen/X86/Mxcsr.cs
new file mode 100644
index 00000000..c61eac31
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/Mxcsr.cs
@@ -0,0 +1,15 @@
+using System;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ [Flags]
+ enum Mxcsr
+ {
+ Ftz = 1 << 15, // Flush To Zero.
+ Rhi = 1 << 14, // Round Mode high bit.
+ Rlo = 1 << 13, // Round Mode low bit.
+ Um = 1 << 11, // Underflow Mask.
+ Dm = 1 << 8, // Denormal Mask.
+ Daz = 1 << 6 // Denormals Are Zero.
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/PreAllocator.cs b/src/ARMeilleure/CodeGen/X86/PreAllocator.cs
new file mode 100644
index 00000000..cb742d67
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/PreAllocator.cs
@@ -0,0 +1,796 @@
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ class PreAllocator
+ {
+ public static void RunPass(CompilerContext cctx, StackAllocator stackAlloc, out int maxCallArgs)
+ {
+ maxCallArgs = -1;
+
+ Span<Operation> buffer = default;
+
+ CallConvName callConv = CallingConvention.GetCurrentCallConv();
+
+ Operand[] preservedArgs = new Operand[CallingConvention.GetArgumentsOnRegsCount()];
+
+ for (BasicBlock block = cctx.Cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ Operation nextNode;
+
+ for (Operation node = block.Operations.First; node != default; node = nextNode)
+ {
+ nextNode = node.ListNext;
+
+ if (node.Instruction == Instruction.Phi)
+ {
+ continue;
+ }
+
+ InsertConstantRegCopies(block.Operations, node);
+ InsertDestructiveRegCopies(block.Operations, node);
+ InsertConstrainedRegCopies(block.Operations, node);
+
+ switch (node.Instruction)
+ {
+ case Instruction.Call:
+ // Get the maximum number of arguments used on a call.
+ // On windows, when a struct is returned from the call,
+ // we also need to pass the pointer where the struct
+ // should be written on the first argument.
+ int argsCount = node.SourcesCount - 1;
+
+ if (node.Destination != default && node.Destination.Type == OperandType.V128)
+ {
+ argsCount++;
+ }
+
+ if (maxCallArgs < argsCount)
+ {
+ maxCallArgs = argsCount;
+ }
+
+ // Copy values to registers expected by the function
+ // being called, as mandated by the ABI.
+ if (callConv == CallConvName.Windows)
+ {
+ PreAllocatorWindows.InsertCallCopies(block.Operations, stackAlloc, node);
+ }
+ else /* if (callConv == CallConvName.SystemV) */
+ {
+ PreAllocatorSystemV.InsertCallCopies(block.Operations, node);
+ }
+ break;
+
+ case Instruction.ConvertToFPUI:
+ GenerateConvertToFPUI(block.Operations, node);
+ break;
+
+ case Instruction.LoadArgument:
+ if (callConv == CallConvName.Windows)
+ {
+ nextNode = PreAllocatorWindows.InsertLoadArgumentCopy(cctx, ref buffer, block.Operations, preservedArgs, node);
+ }
+ else /* if (callConv == CallConvName.SystemV) */
+ {
+ nextNode = PreAllocatorSystemV.InsertLoadArgumentCopy(cctx, ref buffer, block.Operations, preservedArgs, node);
+ }
+ break;
+
+ case Instruction.Negate:
+ if (!node.GetSource(0).Type.IsInteger())
+ {
+ GenerateNegate(block.Operations, node);
+ }
+ break;
+
+ case Instruction.Return:
+ if (callConv == CallConvName.Windows)
+ {
+ PreAllocatorWindows.InsertReturnCopy(cctx, block.Operations, preservedArgs, node);
+ }
+ else /* if (callConv == CallConvName.SystemV) */
+ {
+ PreAllocatorSystemV.InsertReturnCopy(block.Operations, node);
+ }
+ break;
+
+ case Instruction.Tailcall:
+ if (callConv == CallConvName.Windows)
+ {
+ PreAllocatorWindows.InsertTailcallCopies(block.Operations, stackAlloc, node);
+ }
+ else
+ {
+ PreAllocatorSystemV.InsertTailcallCopies(block.Operations, stackAlloc, node);
+ }
+ break;
+
+ case Instruction.VectorInsert8:
+ if (!HardwareCapabilities.SupportsSse41)
+ {
+ GenerateVectorInsert8(block.Operations, node);
+ }
+ break;
+
+ case Instruction.Extended:
+ if (node.Intrinsic == Intrinsic.X86Ldmxcsr)
+ {
+ int stackOffset = stackAlloc.Allocate(OperandType.I32);
+
+ node.SetSources(new Operand[] { Const(stackOffset), node.GetSource(0) });
+ }
+ else if (node.Intrinsic == Intrinsic.X86Stmxcsr)
+ {
+ int stackOffset = stackAlloc.Allocate(OperandType.I32);
+
+ node.SetSources(new Operand[] { Const(stackOffset) });
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ protected static void InsertConstantRegCopies(IntrusiveList<Operation> nodes, Operation node)
+ {
+ if (node.SourcesCount == 0 || IsXmmIntrinsic(node))
+ {
+ return;
+ }
+
+ Instruction inst = node.Instruction;
+
+ Operand src1 = node.GetSource(0);
+ Operand src2;
+
+ if (src1.Kind == OperandKind.Constant)
+ {
+ if (!src1.Type.IsInteger())
+ {
+ // Handle non-integer types (FP32, FP64 and V128).
+ // For instructions without an immediate operand, we do the following:
+ // - Insert a copy with the constant value (as integer) to a GPR.
+ // - Insert a copy from the GPR to a XMM register.
+ // - Replace the constant use with the XMM register.
+ src1 = AddXmmCopy(nodes, node, src1);
+
+ node.SetSource(0, src1);
+ }
+ else if (!HasConstSrc1(inst))
+ {
+ // Handle integer types.
+ // Most ALU instructions accepts a 32-bits immediate on the second operand.
+ // We need to ensure the following:
+ // - If the constant is on operand 1, we need to move it.
+ // -- But first, we try to swap operand 1 and 2 if the instruction is commutative.
+ // -- Doing so may allow us to encode the constant as operand 2 and avoid a copy.
+ // - If the constant is on operand 2, we check if the instruction supports it,
+ // if not, we also add a copy. 64-bits constants are usually not supported.
+ if (IsCommutative(node))
+ {
+ src2 = node.GetSource(1);
+
+ Operand temp = src1;
+
+ src1 = src2;
+ src2 = temp;
+
+ node.SetSource(0, src1);
+ node.SetSource(1, src2);
+ }
+
+ if (src1.Kind == OperandKind.Constant)
+ {
+ src1 = AddCopy(nodes, node, src1);
+
+ node.SetSource(0, src1);
+ }
+ }
+ }
+
+ if (node.SourcesCount < 2)
+ {
+ return;
+ }
+
+ src2 = node.GetSource(1);
+
+ if (src2.Kind == OperandKind.Constant)
+ {
+ if (!src2.Type.IsInteger())
+ {
+ src2 = AddXmmCopy(nodes, node, src2);
+
+ node.SetSource(1, src2);
+ }
+ else if (!HasConstSrc2(inst) || CodeGenCommon.IsLongConst(src2))
+ {
+ src2 = AddCopy(nodes, node, src2);
+
+ node.SetSource(1, src2);
+ }
+ }
+ }
+
+ protected static void InsertConstrainedRegCopies(IntrusiveList<Operation> nodes, Operation node)
+ {
+ Operand dest = node.Destination;
+
+ switch (node.Instruction)
+ {
+ case Instruction.CompareAndSwap:
+ case Instruction.CompareAndSwap16:
+ case Instruction.CompareAndSwap8:
+ {
+ OperandType type = node.GetSource(1).Type;
+
+ if (type == OperandType.V128)
+ {
+ // Handle the many restrictions of the compare and exchange (16 bytes) instruction:
+ // - The expected value should be in RDX:RAX.
+ // - The new value to be written should be in RCX:RBX.
+ // - The value at the memory location is loaded to RDX:RAX.
+ void SplitOperand(Operand source, Operand lr, Operand hr)
+ {
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, lr, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, hr, source, Const(1)));
+ }
+
+ Operand rax = Gpr(X86Register.Rax, OperandType.I64);
+ Operand rbx = Gpr(X86Register.Rbx, OperandType.I64);
+ Operand rcx = Gpr(X86Register.Rcx, OperandType.I64);
+ Operand rdx = Gpr(X86Register.Rdx, OperandType.I64);
+
+ SplitOperand(node.GetSource(1), rax, rdx);
+ SplitOperand(node.GetSource(2), rbx, rcx);
+
+ Operation operation = node;
+
+ node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, rax));
+ nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, rdx, Const(1)));
+
+ operation.SetDestinations(new Operand[] { rdx, rax });
+ operation.SetSources(new Operand[] { operation.GetSource(0), rdx, rax, rcx, rbx });
+ }
+ else
+ {
+ // Handle the many restrictions of the compare and exchange (32/64) instruction:
+ // - The expected value should be in (E/R)AX.
+ // - The value at the memory location is loaded to (E/R)AX.
+ Operand expected = node.GetSource(1);
+ Operand newValue = node.GetSource(2);
+
+ Operand rax = Gpr(X86Register.Rax, expected.Type);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, rax, expected));
+
+ // We need to store the new value into a temp, since it may
+ // be a constant, and this instruction does not support immediate operands.
+ Operand temp = Local(newValue.Type);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, temp, newValue));
+
+ node.SetSources(new Operand[] { node.GetSource(0), rax, temp });
+
+ nodes.AddAfter(node, Operation(Instruction.Copy, dest, rax));
+
+ node.Destination = rax;
+ }
+
+ break;
+ }
+
+ case Instruction.Divide:
+ case Instruction.DivideUI:
+ {
+ // Handle the many restrictions of the division instructions:
+ // - The dividend is always in RDX:RAX.
+ // - The result is always in RAX.
+ // - Additionally it also writes the remainder in RDX.
+ if (dest.Type.IsInteger())
+ {
+ Operand src1 = node.GetSource(0);
+
+ Operand rax = Gpr(X86Register.Rax, src1.Type);
+ Operand rdx = Gpr(X86Register.Rdx, src1.Type);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, rax, src1));
+ nodes.AddBefore(node, Operation(Instruction.Clobber, rdx));
+
+ nodes.AddAfter(node, Operation(Instruction.Copy, dest, rax));
+
+ node.SetSources(new Operand[] { rdx, rax, node.GetSource(1) });
+ node.Destination = rax;
+ }
+
+ break;
+ }
+
+ case Instruction.Extended:
+ {
+ bool isBlend = node.Intrinsic == Intrinsic.X86Blendvpd ||
+ node.Intrinsic == Intrinsic.X86Blendvps ||
+ node.Intrinsic == Intrinsic.X86Pblendvb;
+
+ // BLENDVPD, BLENDVPS, PBLENDVB last operand is always implied to be XMM0 when VEX is not supported.
+ // SHA256RNDS2 always has an implied XMM0 as a last operand.
+ if ((isBlend && !HardwareCapabilities.SupportsVexEncoding) || node.Intrinsic == Intrinsic.X86Sha256Rnds2)
+ {
+ Operand xmm0 = Xmm(X86Register.Xmm0, OperandType.V128);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, xmm0, node.GetSource(2)));
+
+ node.SetSource(2, xmm0);
+ }
+
+ break;
+ }
+
+ case Instruction.Multiply64HighSI:
+ case Instruction.Multiply64HighUI:
+ {
+ // Handle the many restrictions of the i64 * i64 = i128 multiply instructions:
+ // - The multiplicand is always in RAX.
+ // - The lower 64-bits of the result is always in RAX.
+ // - The higher 64-bits of the result is always in RDX.
+ Operand src1 = node.GetSource(0);
+
+ Operand rax = Gpr(X86Register.Rax, src1.Type);
+ Operand rdx = Gpr(X86Register.Rdx, src1.Type);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, rax, src1));
+
+ node.SetSource(0, rax);
+
+ nodes.AddAfter(node, Operation(Instruction.Copy, dest, rdx));
+
+ node.SetDestinations(new Operand[] { rdx, rax });
+
+ break;
+ }
+
+ case Instruction.RotateRight:
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ {
+ // The shift register is always implied to be CL (low 8-bits of RCX or ECX).
+ if (node.GetSource(1).Kind == OperandKind.LocalVariable)
+ {
+ Operand rcx = Gpr(X86Register.Rcx, OperandType.I32);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, rcx, node.GetSource(1)));
+
+ node.SetSource(1, rcx);
+ }
+
+ break;
+ }
+ }
+ }
+
+ protected static void InsertDestructiveRegCopies(IntrusiveList<Operation> nodes, Operation node)
+ {
+ if (node.Destination == default || node.SourcesCount == 0)
+ {
+ return;
+ }
+
+ Instruction inst = node.Instruction;
+
+ Operand dest = node.Destination;
+ Operand src1 = node.GetSource(0);
+
+ // The multiply instruction (that maps to IMUL) is somewhat special, it has
+ // a three operand form where the second source is a immediate value.
+ bool threeOperandForm = inst == Instruction.Multiply && node.GetSource(1).Kind == OperandKind.Constant;
+
+ if (IsSameOperandDestSrc1(node) && src1.Kind == OperandKind.LocalVariable && !threeOperandForm)
+ {
+ bool useNewLocal = false;
+
+ for (int srcIndex = 1; srcIndex < node.SourcesCount; srcIndex++)
+ {
+ if (node.GetSource(srcIndex) == dest)
+ {
+ useNewLocal = true;
+
+ break;
+ }
+ }
+
+ if (useNewLocal)
+ {
+ // Dest is being used as some source already, we need to use a new
+ // local to store the temporary value, otherwise the value on dest
+ // local would be overwritten.
+ Operand temp = Local(dest.Type);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, temp, src1));
+
+ node.SetSource(0, temp);
+
+ nodes.AddAfter(node, Operation(Instruction.Copy, dest, temp));
+
+ node.Destination = temp;
+ }
+ else
+ {
+ nodes.AddBefore(node, Operation(Instruction.Copy, dest, src1));
+
+ node.SetSource(0, dest);
+ }
+ }
+ else if (inst == Instruction.ConditionalSelect)
+ {
+ Operand src2 = node.GetSource(1);
+ Operand src3 = node.GetSource(2);
+
+ if (src1 == dest || src2 == dest)
+ {
+ Operand temp = Local(dest.Type);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, temp, src3));
+
+ node.SetSource(2, temp);
+
+ nodes.AddAfter(node, Operation(Instruction.Copy, dest, temp));
+
+ node.Destination = temp;
+ }
+ else
+ {
+ nodes.AddBefore(node, Operation(Instruction.Copy, dest, src3));
+
+ node.SetSource(2, dest);
+ }
+ }
+ }
+
+ private static void GenerateConvertToFPUI(IntrusiveList<Operation> nodes, Operation node)
+ {
+ // Unsigned integer to FP conversions are not supported on X86.
+ // We need to turn them into signed integer to FP conversions, and
+ // adjust the final result.
+ Operand dest = node.Destination;
+ Operand source = node.GetSource(0);
+
+ Debug.Assert(source.Type.IsInteger(), $"Invalid source type \"{source.Type}\".");
+
+ Operation currentNode = node;
+
+ if (source.Type == OperandType.I32)
+ {
+ // For 32-bits integers, we can just zero-extend to 64-bits,
+ // and then use the 64-bits signed conversion instructions.
+ Operand zex = Local(OperandType.I64);
+
+ node = nodes.AddAfter(node, Operation(Instruction.ZeroExtend32, zex, source));
+ node = nodes.AddAfter(node, Operation(Instruction.ConvertToFP, dest, zex));
+ }
+ else /* if (source.Type == OperandType.I64) */
+ {
+ // For 64-bits integers, we need to do the following:
+ // - Ensure that the integer has the most significant bit clear.
+ // -- This can be done by shifting the value right by 1, that is, dividing by 2.
+ // -- The least significant bit is lost in this case though.
+ // - We can then convert the shifted value with a signed integer instruction.
+ // - The result still needs to be corrected after that.
+ // -- First, we need to multiply the result by 2, as we divided it by 2 before.
+ // --- This can be done efficiently by adding the result to itself.
+ // -- Then, we need to add the least significant bit that was shifted out.
+ // --- We can convert the least significant bit to float, and add it to the result.
+ Operand lsb = Local(OperandType.I64);
+ Operand half = Local(OperandType.I64);
+
+ Operand lsbF = Local(dest.Type);
+
+ node = nodes.AddAfter(node, Operation(Instruction.Copy, lsb, source));
+ node = nodes.AddAfter(node, Operation(Instruction.Copy, half, source));
+
+ node = nodes.AddAfter(node, Operation(Instruction.BitwiseAnd, lsb, lsb, Const(1L)));
+ node = nodes.AddAfter(node, Operation(Instruction.ShiftRightUI, half, half, Const(1)));
+
+ node = nodes.AddAfter(node, Operation(Instruction.ConvertToFP, lsbF, lsb));
+ node = nodes.AddAfter(node, Operation(Instruction.ConvertToFP, dest, half));
+
+ node = nodes.AddAfter(node, Operation(Instruction.Add, dest, dest, dest));
+ nodes.AddAfter(node, Operation(Instruction.Add, dest, dest, lsbF));
+ }
+
+ Delete(nodes, currentNode);
+ }
+
+ private static void GenerateNegate(IntrusiveList<Operation> nodes, Operation node)
+ {
+ // There's no SSE FP negate instruction, so we need to transform that into
+ // a XOR of the value to be negated with a mask with the highest bit set.
+ // This also produces -0 for a negation of the value 0.
+ Operand dest = node.Destination;
+ Operand source = node.GetSource(0);
+
+ Debug.Assert(dest.Type == OperandType.FP32 ||
+ dest.Type == OperandType.FP64, $"Invalid destination type \"{dest.Type}\".");
+
+ Operation currentNode = node;
+
+ Operand res = Local(dest.Type);
+
+ node = nodes.AddAfter(node, Operation(Instruction.VectorOne, res));
+
+ if (dest.Type == OperandType.FP32)
+ {
+ node = nodes.AddAfter(node, Operation(Intrinsic.X86Pslld, res, res, Const(31)));
+ }
+ else /* if (dest.Type == OperandType.FP64) */
+ {
+ node = nodes.AddAfter(node, Operation(Intrinsic.X86Psllq, res, res, Const(63)));
+ }
+
+ node = nodes.AddAfter(node, Operation(Intrinsic.X86Xorps, res, res, source));
+
+ nodes.AddAfter(node, Operation(Instruction.Copy, dest, res));
+
+ Delete(nodes, currentNode);
+ }
+
+ private static void GenerateVectorInsert8(IntrusiveList<Operation> nodes, Operation node)
+ {
+ // Handle vector insertion, when SSE 4.1 is not supported.
+ Operand dest = node.Destination;
+ Operand src1 = node.GetSource(0); // Vector
+ Operand src2 = node.GetSource(1); // Value
+ Operand src3 = node.GetSource(2); // Index
+
+ Debug.Assert(src3.Kind == OperandKind.Constant);
+
+ byte index = src3.AsByte();
+
+ Debug.Assert(index < 16);
+
+ Operation currentNode = node;
+
+ Operand temp1 = Local(OperandType.I32);
+ Operand temp2 = Local(OperandType.I32);
+
+ node = nodes.AddAfter(node, Operation(Instruction.Copy, temp2, src2));
+
+ Operation vextOp = Operation(Instruction.VectorExtract16, temp1, src1, Const(index >> 1));
+
+ node = nodes.AddAfter(node, vextOp);
+
+ if ((index & 1) != 0)
+ {
+ node = nodes.AddAfter(node, Operation(Instruction.ZeroExtend8, temp1, temp1));
+ node = nodes.AddAfter(node, Operation(Instruction.ShiftLeft, temp2, temp2, Const(8)));
+ node = nodes.AddAfter(node, Operation(Instruction.BitwiseOr, temp1, temp1, temp2));
+ }
+ else
+ {
+ node = nodes.AddAfter(node, Operation(Instruction.ZeroExtend8, temp2, temp2));
+ node = nodes.AddAfter(node, Operation(Instruction.BitwiseAnd, temp1, temp1, Const(0xff00)));
+ node = nodes.AddAfter(node, Operation(Instruction.BitwiseOr, temp1, temp1, temp2));
+ }
+
+ Operation vinsOp = Operation(Instruction.VectorInsert16, dest, src1, temp1, Const(index >> 1));
+
+ nodes.AddAfter(node, vinsOp);
+
+ Delete(nodes, currentNode);
+ }
+
+ protected static Operand AddXmmCopy(IntrusiveList<Operation> nodes, Operation node, Operand source)
+ {
+ Operand temp = Local(source.Type);
+ Operand intConst = AddCopy(nodes, node, GetIntConst(source));
+
+ Operation copyOp = Operation(Instruction.VectorCreateScalar, temp, intConst);
+
+ nodes.AddBefore(node, copyOp);
+
+ return temp;
+ }
+
+ protected static Operand AddCopy(IntrusiveList<Operation> nodes, Operation node, Operand source)
+ {
+ Operand temp = Local(source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, temp, source);
+
+ nodes.AddBefore(node, copyOp);
+
+ return temp;
+ }
+
+ private static Operand GetIntConst(Operand value)
+ {
+ if (value.Type == OperandType.FP32)
+ {
+ return Const(value.AsInt32());
+ }
+ else if (value.Type == OperandType.FP64)
+ {
+ return Const(value.AsInt64());
+ }
+
+ return value;
+ }
+
+ protected static void Delete(IntrusiveList<Operation> nodes, Operation node)
+ {
+ node.Destination = default;
+
+ for (int index = 0; index < node.SourcesCount; index++)
+ {
+ node.SetSource(index, default);
+ }
+
+ nodes.Remove(node);
+ }
+
+ protected static Operand Gpr(X86Register register, OperandType type)
+ {
+ return Register((int)register, RegisterType.Integer, type);
+ }
+
+ protected static Operand Xmm(X86Register register, OperandType type)
+ {
+ return Register((int)register, RegisterType.Vector, type);
+ }
+
+ private static bool IsSameOperandDestSrc1(Operation operation)
+ {
+ switch (operation.Instruction)
+ {
+ case Instruction.Add:
+ return !HardwareCapabilities.SupportsVexEncoding && !operation.Destination.Type.IsInteger();
+ case Instruction.Multiply:
+ case Instruction.Subtract:
+ return !HardwareCapabilities.SupportsVexEncoding || operation.Destination.Type.IsInteger();
+
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseNot:
+ case Instruction.BitwiseOr:
+ case Instruction.ByteSwap:
+ case Instruction.Negate:
+ case Instruction.RotateRight:
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ return true;
+
+ case Instruction.Divide:
+ return !HardwareCapabilities.SupportsVexEncoding && !operation.Destination.Type.IsInteger();
+
+ case Instruction.VectorInsert:
+ case Instruction.VectorInsert16:
+ case Instruction.VectorInsert8:
+ return !HardwareCapabilities.SupportsVexEncoding;
+
+ case Instruction.Extended:
+ return IsIntrinsicSameOperandDestSrc1(operation);
+ }
+
+ return IsVexSameOperandDestSrc1(operation);
+ }
+
+ private static bool IsIntrinsicSameOperandDestSrc1(Operation operation)
+ {
+ IntrinsicInfo info = IntrinsicTable.GetInfo(operation.Intrinsic);
+
+ return info.Type == IntrinsicType.Crc32 || info.Type == IntrinsicType.Fma || IsVexSameOperandDestSrc1(operation);
+ }
+
+ private static bool IsVexSameOperandDestSrc1(Operation operation)
+ {
+ if (IsIntrinsic(operation.Instruction))
+ {
+ IntrinsicInfo info = IntrinsicTable.GetInfo(operation.Intrinsic);
+
+ bool hasVex = HardwareCapabilities.SupportsVexEncoding && Assembler.SupportsVexPrefix(info.Inst);
+
+ bool isUnary = operation.SourcesCount < 2;
+
+ bool hasVecDest = operation.Destination != default && operation.Destination.Type == OperandType.V128;
+
+ return !hasVex && !isUnary && hasVecDest;
+ }
+
+ return false;
+ }
+
+ private static bool HasConstSrc1(Instruction inst)
+ {
+ switch (inst)
+ {
+ case Instruction.Copy:
+ case Instruction.LoadArgument:
+ case Instruction.Spill:
+ case Instruction.SpillArg:
+ return true;
+ }
+
+ return false;
+ }
+
+ private static bool HasConstSrc2(Instruction inst)
+ {
+ switch (inst)
+ {
+ case Instruction.Add:
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseOr:
+ case Instruction.BranchIf:
+ case Instruction.Compare:
+ case Instruction.Multiply:
+ case Instruction.RotateRight:
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightSI:
+ case Instruction.ShiftRightUI:
+ case Instruction.Store:
+ case Instruction.Store16:
+ case Instruction.Store8:
+ case Instruction.Subtract:
+ case Instruction.VectorExtract:
+ case Instruction.VectorExtract16:
+ case Instruction.VectorExtract8:
+ return true;
+ }
+
+ return false;
+ }
+
+ private static bool IsCommutative(Operation operation)
+ {
+ switch (operation.Instruction)
+ {
+ case Instruction.Add:
+ case Instruction.BitwiseAnd:
+ case Instruction.BitwiseExclusiveOr:
+ case Instruction.BitwiseOr:
+ case Instruction.Multiply:
+ return true;
+
+ case Instruction.BranchIf:
+ case Instruction.Compare:
+ {
+ Operand comp = operation.GetSource(2);
+
+ Debug.Assert(comp.Kind == OperandKind.Constant);
+
+ var compType = (Comparison)comp.AsInt32();
+
+ return compType == Comparison.Equal || compType == Comparison.NotEqual;
+ }
+ }
+
+ return false;
+ }
+
+ private static bool IsIntrinsic(Instruction inst)
+ {
+ return inst == Instruction.Extended;
+ }
+
+ private static bool IsXmmIntrinsic(Operation operation)
+ {
+ if (operation.Instruction != Instruction.Extended)
+ {
+ return false;
+ }
+
+ IntrinsicInfo info = IntrinsicTable.GetInfo(operation.Intrinsic);
+
+ return info.Type != IntrinsicType.Crc32;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/PreAllocatorSystemV.cs b/src/ARMeilleure/CodeGen/X86/PreAllocatorSystemV.cs
new file mode 100644
index 00000000..a84d5050
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/PreAllocatorSystemV.cs
@@ -0,0 +1,334 @@
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ class PreAllocatorSystemV : PreAllocator
+ {
+ public static void InsertCallCopies(IntrusiveList<Operation> nodes, Operation node)
+ {
+ Operand dest = node.Destination;
+
+ List<Operand> sources = new List<Operand>
+ {
+ node.GetSource(0)
+ };
+
+ int argsCount = node.SourcesCount - 1;
+
+ int intMax = CallingConvention.GetIntArgumentsOnRegsCount();
+ int vecMax = CallingConvention.GetVecArgumentsOnRegsCount();
+
+ int intCount = 0;
+ int vecCount = 0;
+
+ int stackOffset = 0;
+
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = node.GetSource(index + 1);
+
+ bool passOnReg;
+
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount < intMax;
+ }
+ else if (source.Type == OperandType.V128)
+ {
+ passOnReg = intCount + 1 < intMax;
+ }
+ else
+ {
+ passOnReg = vecCount < vecMax;
+ }
+
+ if (source.Type == OperandType.V128 && passOnReg)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+ Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1)));
+
+ continue;
+ }
+
+ if (passOnReg)
+ {
+ Operand argReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, argReg, source);
+
+ InsertConstantRegCopies(nodes, nodes.AddBefore(node, copyOp));
+
+ sources.Add(argReg);
+ }
+ else
+ {
+ Operand offset = Const(stackOffset);
+
+ Operation spillOp = Operation(Instruction.SpillArg, default, offset, source);
+
+ InsertConstantRegCopies(nodes, nodes.AddBefore(node, spillOp));
+
+ stackOffset += source.Type.GetSizeInBytes();
+ }
+ }
+
+ node.SetSources(sources.ToArray());
+
+ if (dest != default)
+ {
+ if (dest.Type == OperandType.V128)
+ {
+ Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+ Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
+
+ Operation operation = node;
+
+ node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, retLReg));
+ nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, retHReg, Const(1)));
+
+ operation.Destination = default;
+ }
+ else
+ {
+ Operand retReg = dest.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type)
+ : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, dest, retReg);
+
+ nodes.AddAfter(node, copyOp);
+
+ node.Destination = retReg;
+ }
+ }
+ }
+
+ public static void InsertTailcallCopies(IntrusiveList<Operation> nodes, StackAllocator stackAlloc, Operation node)
+ {
+ List<Operand> sources = new List<Operand>
+ {
+ node.GetSource(0)
+ };
+
+ int argsCount = node.SourcesCount - 1;
+
+ int intMax = CallingConvention.GetIntArgumentsOnRegsCount();
+ int vecMax = CallingConvention.GetVecArgumentsOnRegsCount();
+
+ int intCount = 0;
+ int vecCount = 0;
+
+ // Handle arguments passed on registers.
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = node.GetSource(1 + index);
+
+ bool passOnReg;
+
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount + 1 < intMax;
+ }
+ else
+ {
+ passOnReg = vecCount < vecMax;
+ }
+
+ if (source.Type == OperandType.V128 && passOnReg)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+ Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64);
+
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1)));
+
+ continue;
+ }
+
+ if (passOnReg)
+ {
+ Operand argReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, argReg, source);
+
+ InsertConstantRegCopies(nodes, nodes.AddBefore(node, copyOp));
+
+ sources.Add(argReg);
+ }
+ else
+ {
+ throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)");
+ }
+ }
+
+ // The target address must be on the return registers, since we
+ // don't return anything and it is guaranteed to not be a
+ // callee saved register (which would be trashed on the epilogue).
+ Operand retReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+
+ Operation addrCopyOp = Operation(Instruction.Copy, retReg, node.GetSource(0));
+
+ nodes.AddBefore(node, addrCopyOp);
+
+ sources[0] = retReg;
+
+ node.SetSources(sources.ToArray());
+ }
+
+ public static Operation InsertLoadArgumentCopy(
+ CompilerContext cctx,
+ ref Span<Operation> buffer,
+ IntrusiveList<Operation> nodes,
+ Operand[] preservedArgs,
+ Operation node)
+ {
+ Operand source = node.GetSource(0);
+
+ Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind.");
+
+ int index = source.AsInt32();
+
+ int intCount = 0;
+ int vecCount = 0;
+
+ for (int cIndex = 0; cIndex < index; cIndex++)
+ {
+ OperandType argType = cctx.FuncArgTypes[cIndex];
+
+ if (argType.IsInteger())
+ {
+ intCount++;
+ }
+ else if (argType == OperandType.V128)
+ {
+ intCount += 2;
+ }
+ else
+ {
+ vecCount++;
+ }
+ }
+
+ bool passOnReg;
+
+ if (source.Type.IsInteger())
+ {
+ passOnReg = intCount < CallingConvention.GetIntArgumentsOnRegsCount();
+ }
+ else if (source.Type == OperandType.V128)
+ {
+ passOnReg = intCount + 1 < CallingConvention.GetIntArgumentsOnRegsCount();
+ }
+ else
+ {
+ passOnReg = vecCount < CallingConvention.GetVecArgumentsOnRegsCount();
+ }
+
+ if (passOnReg)
+ {
+ Operand dest = node.Destination;
+
+ if (preservedArgs[index] == default)
+ {
+ if (dest.Type == OperandType.V128)
+ {
+ // V128 is a struct, we pass each half on a GPR if possible.
+ Operand pArg = Local(OperandType.V128);
+
+ Operand argLReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount), OperandType.I64);
+ Operand argHReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount + 1), OperandType.I64);
+
+ Operation copyL = Operation(Instruction.VectorCreateScalar, pArg, argLReg);
+ Operation copyH = Operation(Instruction.VectorInsert, pArg, pArg, argHReg, Const(1));
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyH);
+ cctx.Cfg.Entry.Operations.AddFirst(copyL);
+
+ preservedArgs[index] = pArg;
+ }
+ else
+ {
+ Operand pArg = Local(dest.Type);
+
+ Operand argReg = dest.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(intCount), dest.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(vecCount), dest.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, pArg, argReg);
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyOp);
+
+ preservedArgs[index] = pArg;
+ }
+ }
+
+ Operation nextNode;
+
+ if (dest.AssignmentsCount == 1)
+ {
+ // Let's propagate the argument if we can to avoid copies.
+ PreAllocatorCommon.Propagate(ref buffer, dest, preservedArgs[index]);
+ nextNode = node.ListNext;
+ }
+ else
+ {
+ Operation argCopyOp = Operation(Instruction.Copy, dest, preservedArgs[index]);
+ nextNode = nodes.AddBefore(node, argCopyOp);
+ }
+
+ Delete(nodes, node);
+ return nextNode;
+ }
+ else
+ {
+ // TODO: Pass on stack.
+ return node;
+ }
+ }
+
+ public static void InsertReturnCopy(IntrusiveList<Operation> nodes, Operation node)
+ {
+ if (node.SourcesCount == 0)
+ {
+ return;
+ }
+
+ Operand source = node.GetSource(0);
+
+ if (source.Type == OperandType.V128)
+ {
+ Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+ Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64);
+
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, retLReg, source, Const(0)));
+ nodes.AddBefore(node, Operation(Instruction.VectorExtract, retHReg, source, Const(1)));
+ }
+ else
+ {
+ Operand retReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntReturnRegister(), source.Type)
+ : Xmm(CallingConvention.GetVecReturnRegister(), source.Type);
+
+ Operation retCopyOp = Operation(Instruction.Copy, retReg, source);
+
+ nodes.AddBefore(node, retCopyOp);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/PreAllocatorWindows.cs b/src/ARMeilleure/CodeGen/X86/PreAllocatorWindows.cs
new file mode 100644
index 00000000..45319e6a
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/PreAllocatorWindows.cs
@@ -0,0 +1,327 @@
+using ARMeilleure.CodeGen.RegisterAllocators;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System;
+using System.Diagnostics;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ class PreAllocatorWindows : PreAllocator
+ {
+ public static void InsertCallCopies(IntrusiveList<Operation> nodes, StackAllocator stackAlloc, Operation node)
+ {
+ Operand dest = node.Destination;
+
+ // Handle struct arguments.
+ int retArgs = 0;
+ int stackAllocOffset = 0;
+
+ int AllocateOnStack(int size)
+ {
+ // We assume that the stack allocator is initially empty (TotalSize = 0).
+ // Taking that into account, we can reuse the space allocated for other
+ // calls by keeping track of our own allocated size (stackAllocOffset).
+ // If the space allocated is not big enough, then we just expand it.
+ int offset = stackAllocOffset;
+
+ if (stackAllocOffset + size > stackAlloc.TotalSize)
+ {
+ stackAlloc.Allocate((stackAllocOffset + size) - stackAlloc.TotalSize);
+ }
+
+ stackAllocOffset += size;
+
+ return offset;
+ }
+
+ Operand arg0Reg = default;
+
+ if (dest != default && dest.Type == OperandType.V128)
+ {
+ int stackOffset = AllocateOnStack(dest.Type.GetSizeInBytes());
+
+ arg0Reg = Gpr(CallingConvention.GetIntArgumentRegister(0), OperandType.I64);
+
+ Operation allocOp = Operation(Instruction.StackAlloc, arg0Reg, Const(stackOffset));
+
+ nodes.AddBefore(node, allocOp);
+
+ retArgs = 1;
+ }
+
+ int argsCount = node.SourcesCount - 1;
+ int maxArgs = CallingConvention.GetArgumentsOnRegsCount() - retArgs;
+
+ if (argsCount > maxArgs)
+ {
+ argsCount = maxArgs;
+ }
+
+ Operand[] sources = new Operand[1 + retArgs + argsCount];
+
+ sources[0] = node.GetSource(0);
+
+ if (arg0Reg != default)
+ {
+ sources[1] = arg0Reg;
+ }
+
+ for (int index = 1; index < node.SourcesCount; index++)
+ {
+ Operand source = node.GetSource(index);
+
+ if (source.Type == OperandType.V128)
+ {
+ Operand stackAddr = Local(OperandType.I64);
+
+ int stackOffset = AllocateOnStack(source.Type.GetSizeInBytes());
+
+ nodes.AddBefore(node, Operation(Instruction.StackAlloc, stackAddr, Const(stackOffset)));
+
+ Operation storeOp = Operation(Instruction.Store, default, stackAddr, source);
+
+ InsertConstantRegCopies(nodes, nodes.AddBefore(node, storeOp));
+
+ node.SetSource(index, stackAddr);
+ }
+ }
+
+ // Handle arguments passed on registers.
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = node.GetSource(index + 1);
+ Operand argReg;
+
+ int argIndex = index + retArgs;
+
+ if (source.Type.IsInteger())
+ {
+ argReg = Gpr(CallingConvention.GetIntArgumentRegister(argIndex), source.Type);
+ }
+ else
+ {
+ argReg = Xmm(CallingConvention.GetVecArgumentRegister(argIndex), source.Type);
+ }
+
+ Operation copyOp = Operation(Instruction.Copy, argReg, source);
+
+ InsertConstantRegCopies(nodes, nodes.AddBefore(node, copyOp));
+
+ sources[1 + retArgs + index] = argReg;
+ }
+
+ // The remaining arguments (those that are not passed on registers)
+ // should be passed on the stack, we write them to the stack with "SpillArg".
+ for (int index = argsCount; index < node.SourcesCount - 1; index++)
+ {
+ Operand source = node.GetSource(index + 1);
+ Operand offset = Const((index + retArgs) * 8);
+
+ Operation spillOp = Operation(Instruction.SpillArg, default, offset, source);
+
+ InsertConstantRegCopies(nodes, nodes.AddBefore(node, spillOp));
+ }
+
+ if (dest != default)
+ {
+ if (dest.Type == OperandType.V128)
+ {
+ Operand retValueAddr = Local(OperandType.I64);
+
+ nodes.AddBefore(node, Operation(Instruction.Copy, retValueAddr, arg0Reg));
+
+ Operation loadOp = Operation(Instruction.Load, dest, retValueAddr);
+
+ nodes.AddAfter(node, loadOp);
+
+ node.Destination = default;
+ }
+ else
+ {
+ Operand retReg = dest.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type)
+ : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, dest, retReg);
+
+ nodes.AddAfter(node, copyOp);
+
+ node.Destination = retReg;
+ }
+ }
+
+ node.SetSources(sources);
+ }
+
+ public static void InsertTailcallCopies(IntrusiveList<Operation> nodes, StackAllocator stackAlloc, Operation node)
+ {
+ int argsCount = node.SourcesCount - 1;
+ int maxArgs = CallingConvention.GetArgumentsOnRegsCount();
+
+ if (argsCount > maxArgs)
+ {
+ throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)");
+ }
+
+ Operand[] sources = new Operand[1 + argsCount];
+
+ // Handle arguments passed on registers.
+ for (int index = 0; index < argsCount; index++)
+ {
+ Operand source = node.GetSource(1 + index);
+ Operand argReg = source.Type.IsInteger()
+ ? Gpr(CallingConvention.GetIntArgumentRegister(index), source.Type)
+ : Xmm(CallingConvention.GetVecArgumentRegister(index), source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, argReg, source);
+
+ InsertConstantRegCopies(nodes, nodes.AddBefore(node, copyOp));
+
+ sources[1 + index] = argReg;
+ }
+
+ // The target address must be on the return registers, since we
+ // don't return anything and it is guaranteed to not be a
+ // callee saved register (which would be trashed on the epilogue).
+ Operand retReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64);
+
+ Operation addrCopyOp = Operation(Instruction.Copy, retReg, node.GetSource(0));
+
+ nodes.AddBefore(node, addrCopyOp);
+
+ sources[0] = retReg;
+
+ node.SetSources(sources);
+ }
+
+ public static Operation InsertLoadArgumentCopy(
+ CompilerContext cctx,
+ ref Span<Operation> buffer,
+ IntrusiveList<Operation> nodes,
+ Operand[] preservedArgs,
+ Operation node)
+ {
+ Operand source = node.GetSource(0);
+
+ Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind.");
+
+ int retArgs = cctx.FuncReturnType == OperandType.V128 ? 1 : 0;
+
+ int index = source.AsInt32() + retArgs;
+
+ if (index < CallingConvention.GetArgumentsOnRegsCount())
+ {
+ Operand dest = node.Destination;
+
+ if (preservedArgs[index] == default)
+ {
+ Operand argReg, pArg;
+
+ if (dest.Type.IsInteger())
+ {
+ argReg = Gpr(CallingConvention.GetIntArgumentRegister(index), dest.Type);
+ pArg = Local(dest.Type);
+ }
+ else if (dest.Type == OperandType.V128)
+ {
+ argReg = Gpr(CallingConvention.GetIntArgumentRegister(index), OperandType.I64);
+ pArg = Local(OperandType.I64);
+ }
+ else
+ {
+ argReg = Xmm(CallingConvention.GetVecArgumentRegister(index), dest.Type);
+ pArg = Local(dest.Type);
+ }
+
+ Operation copyOp = Operation(Instruction.Copy, pArg, argReg);
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyOp);
+
+ preservedArgs[index] = pArg;
+ }
+
+ Operation nextNode;
+
+ if (dest.Type != OperandType.V128 && dest.AssignmentsCount == 1)
+ {
+ // Let's propagate the argument if we can to avoid copies.
+ PreAllocatorCommon.Propagate(ref buffer, dest, preservedArgs[index]);
+ nextNode = node.ListNext;
+ }
+ else
+ {
+ Operation argCopyOp = Operation(dest.Type == OperandType.V128
+ ? Instruction.Load
+ : Instruction.Copy, dest, preservedArgs[index]);
+
+ nextNode = nodes.AddBefore(node, argCopyOp);
+ }
+
+ Delete(nodes, node);
+ return nextNode;
+ }
+ else
+ {
+ // TODO: Pass on stack.
+ return node;
+ }
+ }
+
+ public static void InsertReturnCopy(
+ CompilerContext cctx,
+ IntrusiveList<Operation> nodes,
+ Operand[] preservedArgs,
+ Operation node)
+ {
+ if (node.SourcesCount == 0)
+ {
+ return;
+ }
+
+ Operand source = node.GetSource(0);
+ Operand retReg;
+
+ if (source.Type.IsInteger())
+ {
+ retReg = Gpr(CallingConvention.GetIntReturnRegister(), source.Type);
+ }
+ else if (source.Type == OperandType.V128)
+ {
+ if (preservedArgs[0] == default)
+ {
+ Operand preservedArg = Local(OperandType.I64);
+ Operand arg0 = Gpr(CallingConvention.GetIntArgumentRegister(0), OperandType.I64);
+
+ Operation copyOp = Operation(Instruction.Copy, preservedArg, arg0);
+
+ cctx.Cfg.Entry.Operations.AddFirst(copyOp);
+
+ preservedArgs[0] = preservedArg;
+ }
+
+ retReg = preservedArgs[0];
+ }
+ else
+ {
+ retReg = Xmm(CallingConvention.GetVecReturnRegister(), source.Type);
+ }
+
+ if (source.Type == OperandType.V128)
+ {
+ Operation retStoreOp = Operation(Instruction.Store, default, retReg, source);
+
+ nodes.AddBefore(node, retStoreOp);
+ }
+ else
+ {
+ Operation retCopyOp = Operation(Instruction.Copy, retReg, source);
+
+ nodes.AddBefore(node, retCopyOp);
+ }
+
+ node.SetSources(Array.Empty<Operand>());
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/X86Condition.cs b/src/ARMeilleure/CodeGen/X86/X86Condition.cs
new file mode 100644
index 00000000..c82cbdec
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/X86Condition.cs
@@ -0,0 +1,47 @@
+using ARMeilleure.IntermediateRepresentation;
+using System;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ enum X86Condition
+ {
+ Overflow = 0x0,
+ NotOverflow = 0x1,
+ Below = 0x2,
+ AboveOrEqual = 0x3,
+ Equal = 0x4,
+ NotEqual = 0x5,
+ BelowOrEqual = 0x6,
+ Above = 0x7,
+ Sign = 0x8,
+ NotSign = 0x9,
+ ParityEven = 0xa,
+ ParityOdd = 0xb,
+ Less = 0xc,
+ GreaterOrEqual = 0xd,
+ LessOrEqual = 0xe,
+ Greater = 0xf
+ }
+
+ static class ComparisonX86Extensions
+ {
+ public static X86Condition ToX86Condition(this Comparison comp)
+ {
+ return comp switch
+ {
+ Comparison.Equal => X86Condition.Equal,
+ Comparison.NotEqual => X86Condition.NotEqual,
+ Comparison.Greater => X86Condition.Greater,
+ Comparison.LessOrEqual => X86Condition.LessOrEqual,
+ Comparison.GreaterUI => X86Condition.Above,
+ Comparison.LessOrEqualUI => X86Condition.BelowOrEqual,
+ Comparison.GreaterOrEqual => X86Condition.GreaterOrEqual,
+ Comparison.Less => X86Condition.Less,
+ Comparison.GreaterOrEqualUI => X86Condition.AboveOrEqual,
+ Comparison.LessUI => X86Condition.Below,
+
+ _ => throw new ArgumentException(null, nameof(comp))
+ };
+ }
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/X86Instruction.cs b/src/ARMeilleure/CodeGen/X86/X86Instruction.cs
new file mode 100644
index 00000000..9a85c516
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/X86Instruction.cs
@@ -0,0 +1,231 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ enum X86Instruction
+ {
+ None,
+ Add,
+ Addpd,
+ Addps,
+ Addsd,
+ Addss,
+ Aesdec,
+ Aesdeclast,
+ Aesenc,
+ Aesenclast,
+ Aesimc,
+ And,
+ Andnpd,
+ Andnps,
+ Andpd,
+ Andps,
+ Blendvpd,
+ Blendvps,
+ Bsr,
+ Bswap,
+ Call,
+ Cmovcc,
+ Cmp,
+ Cmppd,
+ Cmpps,
+ Cmpsd,
+ Cmpss,
+ Cmpxchg,
+ Cmpxchg16b,
+ Cmpxchg8,
+ Comisd,
+ Comiss,
+ Crc32,
+ Crc32_16,
+ Crc32_8,
+ Cvtdq2pd,
+ Cvtdq2ps,
+ Cvtpd2dq,
+ Cvtpd2ps,
+ Cvtps2dq,
+ Cvtps2pd,
+ Cvtsd2si,
+ Cvtsd2ss,
+ Cvtsi2sd,
+ Cvtsi2ss,
+ Cvtss2sd,
+ Cvtss2si,
+ Div,
+ Divpd,
+ Divps,
+ Divsd,
+ Divss,
+ Gf2p8affineqb,
+ Haddpd,
+ Haddps,
+ Idiv,
+ Imul,
+ Imul128,
+ Insertps,
+ Jmp,
+ Ldmxcsr,
+ Lea,
+ Maxpd,
+ Maxps,
+ Maxsd,
+ Maxss,
+ Minpd,
+ Minps,
+ Minsd,
+ Minss,
+ Mov,
+ Mov16,
+ Mov8,
+ Movd,
+ Movdqu,
+ Movhlps,
+ Movlhps,
+ Movq,
+ Movsd,
+ Movss,
+ Movsx16,
+ Movsx32,
+ Movsx8,
+ Movzx16,
+ Movzx8,
+ Mul128,
+ Mulpd,
+ Mulps,
+ Mulsd,
+ Mulss,
+ Neg,
+ Not,
+ Or,
+ Paddb,
+ Paddd,
+ Paddq,
+ Paddw,
+ Palignr,
+ Pand,
+ Pandn,
+ Pavgb,
+ Pavgw,
+ Pblendvb,
+ Pclmulqdq,
+ Pcmpeqb,
+ Pcmpeqd,
+ Pcmpeqq,
+ Pcmpeqw,
+ Pcmpgtb,
+ Pcmpgtd,
+ Pcmpgtq,
+ Pcmpgtw,
+ Pextrb,
+ Pextrd,
+ Pextrq,
+ Pextrw,
+ Pinsrb,
+ Pinsrd,
+ Pinsrq,
+ Pinsrw,
+ Pmaxsb,
+ Pmaxsd,
+ Pmaxsw,
+ Pmaxub,
+ Pmaxud,
+ Pmaxuw,
+ Pminsb,
+ Pminsd,
+ Pminsw,
+ Pminub,
+ Pminud,
+ Pminuw,
+ Pmovsxbw,
+ Pmovsxdq,
+ Pmovsxwd,
+ Pmovzxbw,
+ Pmovzxdq,
+ Pmovzxwd,
+ Pmulld,
+ Pmullw,
+ Pop,
+ Popcnt,
+ Por,
+ Pshufb,
+ Pshufd,
+ Pslld,
+ Pslldq,
+ Psllq,
+ Psllw,
+ Psrad,
+ Psraw,
+ Psrld,
+ Psrlq,
+ Psrldq,
+ Psrlw,
+ Psubb,
+ Psubd,
+ Psubq,
+ Psubw,
+ Punpckhbw,
+ Punpckhdq,
+ Punpckhqdq,
+ Punpckhwd,
+ Punpcklbw,
+ Punpckldq,
+ Punpcklqdq,
+ Punpcklwd,
+ Push,
+ Pxor,
+ Rcpps,
+ Rcpss,
+ Ror,
+ Roundpd,
+ Roundps,
+ Roundsd,
+ Roundss,
+ Rsqrtps,
+ Rsqrtss,
+ Sar,
+ Setcc,
+ Sha256Msg1,
+ Sha256Msg2,
+ Sha256Rnds2,
+ Shl,
+ Shr,
+ Shufpd,
+ Shufps,
+ Sqrtpd,
+ Sqrtps,
+ Sqrtsd,
+ Sqrtss,
+ Stmxcsr,
+ Sub,
+ Subpd,
+ Subps,
+ Subsd,
+ Subss,
+ Test,
+ Unpckhpd,
+ Unpckhps,
+ Unpcklpd,
+ Unpcklps,
+ Vblendvpd,
+ Vblendvps,
+ Vcvtph2ps,
+ Vcvtps2ph,
+ Vfmadd231pd,
+ Vfmadd231ps,
+ Vfmadd231sd,
+ Vfmadd231ss,
+ Vfmsub231sd,
+ Vfmsub231ss,
+ Vfnmadd231pd,
+ Vfnmadd231ps,
+ Vfnmadd231sd,
+ Vfnmadd231ss,
+ Vfnmsub231sd,
+ Vfnmsub231ss,
+ Vpblendvb,
+ Vpternlogd,
+ Xor,
+ Xorpd,
+ Xorps,
+
+ Count
+ }
+} \ No newline at end of file
diff --git a/src/ARMeilleure/CodeGen/X86/X86Optimizer.cs b/src/ARMeilleure/CodeGen/X86/X86Optimizer.cs
new file mode 100644
index 00000000..98a19b9a
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/X86Optimizer.cs
@@ -0,0 +1,259 @@
+using ARMeilleure.CodeGen.Optimizations;
+using ARMeilleure.IntermediateRepresentation;
+using ARMeilleure.Translation;
+using System.Collections.Generic;
+using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
+using static ARMeilleure.IntermediateRepresentation.Operation.Factory;
+
+namespace ARMeilleure.CodeGen.X86
+{
+ static class X86Optimizer
+ {
+ private const int MaxConstantUses = 10000;
+
+ public static void RunPass(ControlFlowGraph cfg)
+ {
+ var constants = new Dictionary<ulong, Operand>();
+
+ Operand GetConstantCopy(BasicBlock block, Operation operation, Operand source)
+ {
+ // If the constant has many uses, we also force a new constant mov to be added, in order
+ // to avoid overflow of the counts field (that is limited to 16 bits).
+ if (!constants.TryGetValue(source.Value, out var constant) || constant.UsesCount > MaxConstantUses)
+ {
+ constant = Local(source.Type);
+
+ Operation copyOp = Operation(Instruction.Copy, constant, source);
+
+ block.Operations.AddBefore(operation, copyOp);
+
+ constants[source.Value] = constant;
+ }
+
+ return constant;
+ }
+
+ for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext)
+ {
+ constants.Clear();
+
+ Operation nextNode;
+
+ for (Operation node = block.Operations.First; node != default; node = nextNode)
+ {
+ nextNode = node.ListNext;
+
+ // Insert copies for constants that can't fit on a 32-bits immediate.
+ // Doing this early unblocks a few optimizations.
+ if (node.Instruction == Instruction.Add)
+ {
+ Operand src1 = node.GetSource(0);
+ Operand src2 = node.GetSource(1);
+
+ if (src1.Kind == OperandKind.Constant && (src1.Relocatable || CodeGenCommon.IsLongConst(src1)))
+ {
+ node.SetSource(0, GetConstantCopy(block, node, src1));
+ }
+
+ if (src2.Kind == OperandKind.Constant && (src2.Relocatable || CodeGenCommon.IsLongConst(src2)))
+ {
+ node.SetSource(1, GetConstantCopy(block, node, src2));
+ }
+ }
+
+ // Try to fold something like:
+ // shl rbx, 2
+ // add rax, rbx
+ // add rax, 0xcafe
+ // mov rax, [rax]
+ // Into:
+ // mov rax, [rax+rbx*4+0xcafe]
+ if (IsMemoryLoadOrStore(node.Instruction))
+ {
+ OperandType type;
+
+ if (node.Destination != default)
+ {
+ type = node.Destination.Type;
+ }
+ else
+ {
+ type = node.GetSource(1).Type;
+ }
+
+ Operand memOp = GetMemoryOperandOrNull(node.GetSource(0), type);
+
+ if (memOp != default)
+ {
+ node.SetSource(0, memOp);
+ }
+ }
+ }
+ }
+
+ Optimizer.RemoveUnusedNodes(cfg);
+ }
+
+ private static Operand GetMemoryOperandOrNull(Operand addr, OperandType type)
+ {
+ Operand baseOp = addr;
+
+ // First we check if the address is the result of a local X with 32-bits immediate
+ // addition. If that is the case, then the baseOp is X, and the memory operand immediate
+ // becomes the addition immediate. Otherwise baseOp keeps being the address.
+ int imm = GetConstOp(ref baseOp);
+
+ // Now we check if the baseOp is the result of a local Y with a local Z addition.
+ // If that is the case, we now set baseOp to Y and indexOp to Z. We further check
+ // if Z is the result of a left shift of local W by a value >= 0 and <= 3, if that
+ // is the case, we set indexOp to W and adjust the scale value of the memory operand
+ // to match that of the left shift.
+ // There is one missed case, which is the address being a shift result, but this is
+ // probably not worth optimizing as it should never happen.
+ (Operand indexOp, Multiplier scale) = GetIndexOp(ref baseOp);
+
+ // If baseOp is still equal to address, then there's nothing that can be optimized.
+ if (baseOp == addr)
+ {
+ return default;
+ }
+
+ if (imm == 0 && scale == Multiplier.x1 && indexOp != default)
+ {
+ imm = GetConstOp(ref indexOp);
+ }
+
+ return MemoryOp(type, baseOp, indexOp, scale, imm);
+ }
+
+ private static int GetConstOp(ref Operand baseOp)
+ {
+ Operation operation = GetAsgOpWithInst(baseOp, Instruction.Add);
+
+ if (operation == default)
+ {
+ return 0;
+ }
+
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ Operand constOp;
+ Operand otherOp;
+
+ if (src1.Kind == OperandKind.Constant && src2.Kind == OperandKind.LocalVariable)
+ {
+ constOp = src1;
+ otherOp = src2;
+ }
+ else if (src1.Kind == OperandKind.LocalVariable && src2.Kind == OperandKind.Constant)
+ {
+ constOp = src2;
+ otherOp = src1;
+ }
+ else
+ {
+ return 0;
+ }
+
+ // If we have addition by 64-bits constant, then we can't optimize it further,
+ // as we can't encode a 64-bits immediate on the memory operand.
+ if (CodeGenCommon.IsLongConst(constOp))
+ {
+ return 0;
+ }
+
+ baseOp = otherOp;
+
+ return constOp.AsInt32();
+ }
+
+ private static (Operand, Multiplier) GetIndexOp(ref Operand baseOp)
+ {
+ Operand indexOp = default;
+
+ Multiplier scale = Multiplier.x1;
+
+ Operation addOp = GetAsgOpWithInst(baseOp, Instruction.Add);
+
+ if (addOp == default)
+ {
+ return (indexOp, scale);
+ }
+
+ Operand src1 = addOp.GetSource(0);
+ Operand src2 = addOp.GetSource(1);
+
+ if (src1.Kind != OperandKind.LocalVariable || src2.Kind != OperandKind.LocalVariable)
+ {
+ return (indexOp, scale);
+ }
+
+ baseOp = src1;
+ indexOp = src2;
+
+ Operation shlOp = GetAsgOpWithInst(src1, Instruction.ShiftLeft);
+
+ bool indexOnSrc2 = false;
+
+ if (shlOp == default)
+ {
+ shlOp = GetAsgOpWithInst(src2, Instruction.ShiftLeft);
+
+ indexOnSrc2 = true;
+ }
+
+ if (shlOp != default)
+ {
+ Operand shSrc = shlOp.GetSource(0);
+ Operand shift = shlOp.GetSource(1);
+
+ if (shSrc.Kind == OperandKind.LocalVariable && shift.Kind == OperandKind.Constant && shift.Value <= 3)
+ {
+ scale = shift.Value switch
+ {
+ 1 => Multiplier.x2,
+ 2 => Multiplier.x4,
+ 3 => Multiplier.x8,
+ _ => Multiplier.x1
+ };
+
+ baseOp = indexOnSrc2 ? src1 : src2;
+ indexOp = shSrc;
+ }
+ }
+
+ return (indexOp, scale);
+ }
+
+ private static Operation GetAsgOpWithInst(Operand op, Instruction inst)
+ {
+ // If we have multiple assignments, folding is not safe
+ // as the value may be different depending on the
+ // control flow path.
+ if (op.AssignmentsCount != 1)
+ {
+ return default;
+ }
+
+ Operation asgOp = op.Assignments[0];
+
+ if (asgOp.Instruction != inst)
+ {
+ return default;
+ }
+
+ return asgOp;
+ }
+
+ private static bool IsMemoryLoadOrStore(Instruction inst)
+ {
+ return inst == Instruction.Load ||
+ inst == Instruction.Load16 ||
+ inst == Instruction.Load8 ||
+ inst == Instruction.Store ||
+ inst == Instruction.Store16 ||
+ inst == Instruction.Store8;
+ }
+ }
+}
diff --git a/src/ARMeilleure/CodeGen/X86/X86Register.cs b/src/ARMeilleure/CodeGen/X86/X86Register.cs
new file mode 100644
index 00000000..01f63e31
--- /dev/null
+++ b/src/ARMeilleure/CodeGen/X86/X86Register.cs
@@ -0,0 +1,41 @@
+namespace ARMeilleure.CodeGen.X86
+{
+ enum X86Register
+ {
+ Invalid = -1,
+
+ Rax = 0,
+ Rcx = 1,
+ Rdx = 2,
+ Rbx = 3,
+ Rsp = 4,
+ Rbp = 5,
+ Rsi = 6,
+ Rdi = 7,
+ R8 = 8,
+ R9 = 9,
+ R10 = 10,
+ R11 = 11,
+ R12 = 12,
+ R13 = 13,
+ R14 = 14,
+ R15 = 15,
+
+ Xmm0 = 0,
+ Xmm1 = 1,
+ Xmm2 = 2,
+ Xmm3 = 3,
+ Xmm4 = 4,
+ Xmm5 = 5,
+ Xmm6 = 6,
+ Xmm7 = 7,
+ Xmm8 = 8,
+ Xmm9 = 9,
+ Xmm10 = 10,
+ Xmm11 = 11,
+ Xmm12 = 12,
+ Xmm13 = 13,
+ Xmm14 = 14,
+ Xmm15 = 15
+ }
+} \ No newline at end of file