diff options
| author | TSR Berry <20988865+TSRBerry@users.noreply.github.com> | 2023-04-08 01:22:00 +0200 |
|---|---|---|
| committer | Mary <thog@protonmail.com> | 2023-04-27 23:51:14 +0200 |
| commit | cee712105850ac3385cd0091a923438167433f9f (patch) | |
| tree | 4a5274b21d8b7f938c0d0ce18736d3f2993b11b1 /src/ARMeilleure | |
| parent | cd124bda587ef09668a971fa1cac1c3f0cfc9f21 (diff) | |
Move solution and projects to src
Diffstat (limited to 'src/ARMeilleure')
417 files changed, 68496 insertions, 0 deletions
diff --git a/src/ARMeilleure/ARMeilleure.csproj b/src/ARMeilleure/ARMeilleure.csproj new file mode 100644 index 00000000..fa555115 --- /dev/null +++ b/src/ARMeilleure/ARMeilleure.csproj @@ -0,0 +1,26 @@ +<Project Sdk="Microsoft.NET.Sdk"> + + <PropertyGroup> + <TargetFramework>net7.0</TargetFramework> + <AllowUnsafeBlocks>true</AllowUnsafeBlocks> + </PropertyGroup> + + <ItemGroup> + <ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" /> + <ProjectReference Include="..\Ryujinx.Memory\Ryujinx.Memory.csproj" /> + </ItemGroup> + + <ItemGroup> + <ContentWithTargetPath Include="Native\libs\libarmeilleure-jitsupport.dylib" Condition="'$(RuntimeIdentifier)' == '' OR '$(RuntimeIdentifier)' == 'osx-arm64'"> + <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory> + <TargetPath>libarmeilleure-jitsupport.dylib</TargetPath> + </ContentWithTargetPath> + </ItemGroup> + + <ItemGroup> + <AssemblyAttribute Include="System.Runtime.CompilerServices.InternalsVisibleTo"> + <_Parameter1>Ryujinx.Tests</_Parameter1> + </AssemblyAttribute> + </ItemGroup> + +</Project> diff --git a/src/ARMeilleure/Allocators.cs b/src/ARMeilleure/Allocators.cs new file mode 100644 index 00000000..deabf9a2 --- /dev/null +++ b/src/ARMeilleure/Allocators.cs @@ -0,0 +1,42 @@ +using ARMeilleure.Common; +using System; +using System.Runtime.CompilerServices; + +namespace ARMeilleure +{ + static class Allocators + { + [ThreadStatic] private static ArenaAllocator _default; + [ThreadStatic] private static ArenaAllocator _operands; + [ThreadStatic] private static ArenaAllocator _operations; + [ThreadStatic] private static ArenaAllocator _references; + [ThreadStatic] private static ArenaAllocator _liveRanges; + [ThreadStatic] private static ArenaAllocator _liveIntervals; + + public static ArenaAllocator Default => GetAllocator(ref _default, 256 * 1024, 4); + public static ArenaAllocator Operands => GetAllocator(ref _operands, 64 * 1024, 8); + public static ArenaAllocator Operations => GetAllocator(ref _operations, 64 * 1024, 8); + public static ArenaAllocator References => GetAllocator(ref _references, 64 * 1024, 8); + public static ArenaAllocator LiveRanges => GetAllocator(ref _liveRanges, 64 * 1024, 8); + public static ArenaAllocator LiveIntervals => GetAllocator(ref _liveIntervals, 64 * 1024, 8); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static ArenaAllocator GetAllocator(ref ArenaAllocator alloc, uint pageSize, uint pageCount) + { + if (alloc == null) + { + alloc = new ArenaAllocator(pageSize, pageCount); + } + + return alloc; + } + + public static void ResetAll() + { + Default.Reset(); + Operands.Reset(); + Operations.Reset(); + References.Reset(); + } + } +} diff --git a/src/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs b/src/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs new file mode 100644 index 00000000..fdd4d024 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/Arm64Optimizer.cs @@ -0,0 +1,270 @@ +using ARMeilleure.CodeGen.Optimizations; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System.Collections.Generic; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class Arm64Optimizer + { + private const int MaxConstantUses = 10000; + + public static void RunPass(ControlFlowGraph cfg) + { + var constants = new Dictionary<ulong, Operand>(); + + Operand GetConstantCopy(BasicBlock block, Operation operation, Operand source) + { + // If the constant has many uses, we also force a new constant mov to be added, in order + // to avoid overflow of the counts field (that is limited to 16 bits). + if (!constants.TryGetValue(source.Value, out var constant) || constant.UsesCount > MaxConstantUses) + { + constant = Local(source.Type); + + Operation copyOp = Operation(Instruction.Copy, constant, source); + + block.Operations.AddBefore(operation, copyOp); + + constants[source.Value] = constant; + } + + return constant; + } + + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + constants.Clear(); + + Operation nextNode; + + for (Operation node = block.Operations.First; node != default; node = nextNode) + { + nextNode = node.ListNext; + + // Insert copies for constants that can't fit on a 32-bit immediate. + // Doing this early unblocks a few optimizations. + if (node.Instruction == Instruction.Add) + { + Operand src1 = node.GetSource(0); + Operand src2 = node.GetSource(1); + + if (src1.Kind == OperandKind.Constant && (src1.Relocatable || ConstTooLong(src1, OperandType.I32))) + { + node.SetSource(0, GetConstantCopy(block, node, src1)); + } + + if (src2.Kind == OperandKind.Constant && (src2.Relocatable || ConstTooLong(src2, OperandType.I32))) + { + node.SetSource(1, GetConstantCopy(block, node, src2)); + } + } + + // Try to fold something like: + // lsl x1, x1, #2 + // add x0, x0, x1 + // ldr x0, [x0] + // add x2, x2, #16 + // ldr x2, [x2] + // Into: + // ldr x0, [x0, x1, lsl #2] + // ldr x2, [x2, #16] + if (IsMemoryLoadOrStore(node.Instruction)) + { + OperandType type; + + if (node.Destination != default) + { + type = node.Destination.Type; + } + else + { + type = node.GetSource(1).Type; + } + + Operand memOp = GetMemoryOperandOrNull(node.GetSource(0), type); + + if (memOp != default) + { + node.SetSource(0, memOp); + } + } + } + } + + Optimizer.RemoveUnusedNodes(cfg); + } + + private static Operand GetMemoryOperandOrNull(Operand addr, OperandType type) + { + Operand baseOp = addr; + + // First we check if the address is the result of a local X with immediate + // addition. If that is the case, then the baseOp is X, and the memory operand immediate + // becomes the addition immediate. Otherwise baseOp keeps being the address. + int imm = GetConstOp(ref baseOp, type); + if (imm != 0) + { + return MemoryOp(type, baseOp, default, Multiplier.x1, imm); + } + + // Now we check if the baseOp is the result of a local Y with a local Z addition. + // If that is the case, we now set baseOp to Y and indexOp to Z. We further check + // if Z is the result of a left shift of local W by a value == 0 or == Log2(AccessSize), + // if that is the case, we set indexOp to W and adjust the scale value of the memory operand + // to match that of the left shift. + // There is one missed case, which is the address being a shift result, but this is + // probably not worth optimizing as it should never happen. + (Operand indexOp, Multiplier scale) = GetIndexOp(ref baseOp, type); + + // If baseOp is still equal to address, then there's nothing that can be optimized. + if (baseOp == addr) + { + return default; + } + + return MemoryOp(type, baseOp, indexOp, scale, 0); + } + + private static int GetConstOp(ref Operand baseOp, OperandType accessType) + { + Operation operation = GetAsgOpWithInst(baseOp, Instruction.Add); + + if (operation == default) + { + return 0; + } + + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + Operand constOp; + Operand otherOp; + + if (src1.Kind == OperandKind.Constant && src2.Kind == OperandKind.LocalVariable) + { + constOp = src1; + otherOp = src2; + } + else if (src1.Kind == OperandKind.LocalVariable && src2.Kind == OperandKind.Constant) + { + constOp = src2; + otherOp = src1; + } + else + { + return 0; + } + + // If we have addition by a constant that we can't encode on the instruction, + // then we can't optimize it further. + if (ConstTooLong(constOp, accessType)) + { + return 0; + } + + baseOp = otherOp; + + return constOp.AsInt32(); + } + + private static (Operand, Multiplier) GetIndexOp(ref Operand baseOp, OperandType accessType) + { + Operand indexOp = default; + + Multiplier scale = Multiplier.x1; + + Operation addOp = GetAsgOpWithInst(baseOp, Instruction.Add); + + if (addOp == default) + { + return (indexOp, scale); + } + + Operand src1 = addOp.GetSource(0); + Operand src2 = addOp.GetSource(1); + + if (src1.Kind != OperandKind.LocalVariable || src2.Kind != OperandKind.LocalVariable) + { + return (indexOp, scale); + } + + baseOp = src1; + indexOp = src2; + + Operation shlOp = GetAsgOpWithInst(src1, Instruction.ShiftLeft); + + bool indexOnSrc2 = false; + + if (shlOp == default) + { + shlOp = GetAsgOpWithInst(src2, Instruction.ShiftLeft); + + indexOnSrc2 = true; + } + + if (shlOp != default) + { + Operand shSrc = shlOp.GetSource(0); + Operand shift = shlOp.GetSource(1); + + int maxShift = Assembler.GetScaleForType(accessType); + + if (shSrc.Kind == OperandKind.LocalVariable && + shift.Kind == OperandKind.Constant && + (shift.Value == 0 || shift.Value == (ulong)maxShift)) + { + scale = shift.Value switch + { + 1 => Multiplier.x2, + 2 => Multiplier.x4, + 3 => Multiplier.x8, + 4 => Multiplier.x16, + _ => Multiplier.x1 + }; + + baseOp = indexOnSrc2 ? src1 : src2; + indexOp = shSrc; + } + } + + return (indexOp, scale); + } + + private static Operation GetAsgOpWithInst(Operand op, Instruction inst) + { + // If we have multiple assignments, folding is not safe + // as the value may be different depending on the + // control flow path. + if (op.AssignmentsCount != 1) + { + return default; + } + + Operation asgOp = op.Assignments[0]; + + if (asgOp.Instruction != inst) + { + return default; + } + + return asgOp; + } + + private static bool IsMemoryLoadOrStore(Instruction inst) + { + return inst == Instruction.Load || inst == Instruction.Store; + } + + private static bool ConstTooLong(Operand constOp, OperandType accessType) + { + if ((uint)constOp.Value != constOp.Value) + { + return true; + } + + return !CodeGenCommon.ConstFitsOnUImm12(constOp.AsInt32(), accessType); + } + } +} diff --git a/src/ARMeilleure/CodeGen/Arm64/ArmCondition.cs b/src/ARMeilleure/CodeGen/Arm64/ArmCondition.cs new file mode 100644 index 00000000..db27a810 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/ArmCondition.cs @@ -0,0 +1,47 @@ +using ARMeilleure.IntermediateRepresentation; +using System; + +namespace ARMeilleure.CodeGen.Arm64 +{ + enum ArmCondition + { + Eq = 0, + Ne = 1, + GeUn = 2, + LtUn = 3, + Mi = 4, + Pl = 5, + Vs = 6, + Vc = 7, + GtUn = 8, + LeUn = 9, + Ge = 10, + Lt = 11, + Gt = 12, + Le = 13, + Al = 14, + Nv = 15 + } + + static class ComparisonArm64Extensions + { + public static ArmCondition ToArmCondition(this Comparison comp) + { + return comp switch + { + Comparison.Equal => ArmCondition.Eq, + Comparison.NotEqual => ArmCondition.Ne, + Comparison.Greater => ArmCondition.Gt, + Comparison.LessOrEqual => ArmCondition.Le, + Comparison.GreaterUI => ArmCondition.GtUn, + Comparison.LessOrEqualUI => ArmCondition.LeUn, + Comparison.GreaterOrEqual => ArmCondition.Ge, + Comparison.Less => ArmCondition.Lt, + Comparison.GreaterOrEqualUI => ArmCondition.GeUn, + Comparison.LessUI => ArmCondition.LtUn, + + _ => throw new ArgumentException(null, nameof(comp)) + }; + } + } +} diff --git a/src/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs b/src/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs new file mode 100644 index 00000000..062a6d0b --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/ArmExtensionType.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.CodeGen.Arm64 +{ + enum ArmExtensionType + { + Uxtb = 0, + Uxth = 1, + Uxtw = 2, + Uxtx = 3, + Sxtb = 4, + Sxth = 5, + Sxtw = 6, + Sxtx = 7 + } +} diff --git a/src/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs b/src/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs new file mode 100644 index 00000000..d223a146 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/ArmShiftType.cs @@ -0,0 +1,11 @@ + +namespace ARMeilleure.CodeGen.Arm64 +{ + enum ArmShiftType + { + Lsl = 0, + Lsr = 1, + Asr = 2, + Ror = 3 + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Arm64/Assembler.cs b/src/ARMeilleure/CodeGen/Arm64/Assembler.cs new file mode 100644 index 00000000..0ec0be7c --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/Assembler.cs @@ -0,0 +1,1160 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Diagnostics; +using System.IO; +using static ARMeilleure.IntermediateRepresentation.Operand; + +namespace ARMeilleure.CodeGen.Arm64 +{ + class Assembler + { + public const uint SfFlag = 1u << 31; + + private const int SpRegister = 31; + private const int ZrRegister = 31; + + private readonly Stream _stream; + + public Assembler(Stream stream) + { + _stream = stream; + } + + public void Add(Operand rd, Operand rn, Operand rm, ArmExtensionType extensionType, int shiftAmount = 0) + { + WriteInstructionAuto(0x0b200000u, rd, rn, rm, extensionType, shiftAmount); + } + + public void Add(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0, bool immForm = false) + { + WriteInstructionAuto(0x11000000u, 0x0b000000u, rd, rn, rm, shiftType, shiftAmount, immForm); + } + + public void And(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionBitwiseAuto(0x12000000u, 0x0a000000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void Ands(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionBitwiseAuto(0x72000000u, 0x6a000000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void Asr(Operand rd, Operand rn, Operand rm) + { + if (rm.Kind == OperandKind.Constant) + { + int shift = rm.AsInt32(); + int mask = rd.Type == OperandType.I64 ? 63 : 31; + shift &= mask; + Sbfm(rd, rn, shift, mask); + } + else + { + Asrv(rd, rn, rm); + } + } + + public void Asrv(Operand rd, Operand rn, Operand rm) + { + WriteInstructionBitwiseAuto(0x1ac02800u, rd, rn, rm); + } + + public void B(int imm) + { + WriteUInt32(0x14000000u | EncodeSImm26_2(imm)); + } + + public void B(ArmCondition condition, int imm) + { + WriteUInt32(0x54000000u | (uint)condition | (EncodeSImm19_2(imm) << 5)); + } + + public void Blr(Operand rn) + { + WriteUInt32(0xd63f0000u | (EncodeReg(rn) << 5)); + } + + public void Br(Operand rn) + { + WriteUInt32(0xd61f0000u | (EncodeReg(rn) << 5)); + } + + public void Brk() + { + WriteUInt32(0xd4200000u); + } + + public void Cbz(Operand rt, int imm) + { + WriteInstructionAuto(0x34000000u | (EncodeSImm19_2(imm) << 5), rt); + } + + public void Cbnz(Operand rt, int imm) + { + WriteInstructionAuto(0x35000000u | (EncodeSImm19_2(imm) << 5), rt); + } + + public void Clrex(int crm = 15) + { + WriteUInt32(0xd503305fu | (EncodeUImm4(crm) << 8)); + } + + public void Clz(Operand rd, Operand rn) + { + WriteInstructionAuto(0x5ac01000u, rd, rn); + } + + public void CmeqVector(Operand rd, Operand rn, Operand rm, int size, bool q = true) + { + Debug.Assert((uint)size < 4); + WriteSimdInstruction(0x2e208c00u | ((uint)size << 22), rd, rn, rm, q); + } + + public void Cmp(Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + Subs(Factory.Register(ZrRegister, RegisterType.Integer, rn.Type), rn, rm, shiftType, shiftAmount); + } + + public void Csel(Operand rd, Operand rn, Operand rm, ArmCondition condition) + { + WriteInstructionBitwiseAuto(0x1a800000u | ((uint)condition << 12), rd, rn, rm); + } + + public void Cset(Operand rd, ArmCondition condition) + { + var zr = Factory.Register(ZrRegister, RegisterType.Integer, rd.Type); + Csinc(rd, zr, zr, (ArmCondition)((int)condition ^ 1)); + } + + public void Csinc(Operand rd, Operand rn, Operand rm, ArmCondition condition) + { + WriteInstructionBitwiseAuto(0x1a800400u | ((uint)condition << 12), rd, rn, rm); + } + + public void Dmb(uint option) + { + WriteUInt32(0xd50330bfu | (option << 8)); + } + + public void DupScalar(Operand rd, Operand rn, int index, int size) + { + WriteInstruction(0x5e000400u | (EncodeIndexSizeImm5(index, size) << 16), rd, rn); + } + + public void Eor(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionBitwiseAuto(0x52000000u, 0x4a000000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void EorVector(Operand rd, Operand rn, Operand rm, bool q = true) + { + WriteSimdInstruction(0x2e201c00u, rd, rn, rm, q); + } + + public void Extr(Operand rd, Operand rn, Operand rm, int imms) + { + uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u; + WriteInstructionBitwiseAuto(0x13800000u | n | (EncodeUImm6(imms) << 10), rd, rn, rm); + } + + public void FaddScalar(Operand rd, Operand rn, Operand rm) + { + WriteFPInstructionAuto(0x1e202800u, rd, rn, rm); + } + + public void FcvtScalar(Operand rd, Operand rn) + { + uint instruction = 0x1e224000u | (rd.Type == OperandType.FP64 ? 1u << 15 : 1u << 22); + WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5)); + } + + public void FdivScalar(Operand rd, Operand rn, Operand rm) + { + WriteFPInstructionAuto(0x1e201800u, rd, rn, rm); + } + + public void Fmov(Operand rd, Operand rn) + { + WriteFPInstructionAuto(0x1e204000u, rd, rn); + } + + public void Fmov(Operand rd, Operand rn, bool topHalf) + { + Debug.Assert(rd.Type.IsInteger() != rn.Type.IsInteger()); + Debug.Assert(rd.Type == OperandType.I64 || rn.Type == OperandType.I64 || !topHalf); + + uint opcode = rd.Type.IsInteger() ? 0b110u : 0b111u; + + uint rmode = topHalf ? 1u << 19 : 0u; + uint ftype = rd.Type == OperandType.FP64 || rn.Type == OperandType.FP64 ? 1u << 22 : 0u; + uint sf = rd.Type == OperandType.I64 || rn.Type == OperandType.I64 ? SfFlag : 0u; + + WriteUInt32(0x1e260000u | (opcode << 16) | rmode | ftype | sf | EncodeReg(rd) | (EncodeReg(rn) << 5)); + } + + public void FmulScalar(Operand rd, Operand rn, Operand rm) + { + WriteFPInstructionAuto(0x1e200800u, rd, rn, rm); + } + + public void FnegScalar(Operand rd, Operand rn) + { + WriteFPInstructionAuto(0x1e214000u, rd, rn); + } + + public void FsubScalar(Operand rd, Operand rn, Operand rm) + { + WriteFPInstructionAuto(0x1e203800u, rd, rn, rm); + } + + public void Ins(Operand rd, Operand rn, int index, int size) + { + WriteInstruction(0x4e001c00u | (EncodeIndexSizeImm5(index, size) << 16), rd, rn); + } + + public void Ins(Operand rd, Operand rn, int srcIndex, int dstIndex, int size) + { + uint imm4 = (uint)srcIndex << size; + Debug.Assert((uint)srcIndex < (16u >> size)); + WriteInstruction(0x6e000400u | (imm4 << 11) | (EncodeIndexSizeImm5(dstIndex, size) << 16), rd, rn); + } + + public void Ldaxp(Operand rt, Operand rt2, Operand rn) + { + WriteInstruction(0x887f8000u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rt2); + } + + public void Ldaxr(Operand rt, Operand rn) + { + WriteInstruction(0x085ffc00u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn); + } + + public void Ldaxrb(Operand rt, Operand rn) + { + WriteInstruction(0x085ffc00u, rt, rn); + } + + public void Ldaxrh(Operand rt, Operand rn) + { + WriteInstruction(0x085ffc00u | (1u << 30), rt, rn); + } + + public void LdpRiPost(Operand rt, Operand rt2, Operand rn, int imm) + { + uint instruction = GetLdpStpInstruction(0x28c00000u, 0x2cc00000u, imm, rt.Type); + WriteInstruction(instruction, rt, rn, rt2); + } + + public void LdpRiPre(Operand rt, Operand rt2, Operand rn, int imm) + { + uint instruction = GetLdpStpInstruction(0x29c00000u, 0x2dc00000u, imm, rt.Type); + WriteInstruction(instruction, rt, rn, rt2); + } + + public void LdpRiUn(Operand rt, Operand rt2, Operand rn, int imm) + { + uint instruction = GetLdpStpInstruction(0x29400000u, 0x2d400000u, imm, rt.Type); + WriteInstruction(instruction, rt, rn, rt2); + } + + public void Ldr(Operand rt, Operand rn) + { + if (rn.Kind == OperandKind.Memory) + { + MemoryOperand memOp = rn.GetMemory(); + + if (memOp.Index != default) + { + Debug.Assert(memOp.Displacement == 0); + Debug.Assert(memOp.Scale == Multiplier.x1 || (int)memOp.Scale == GetScaleForType(rt.Type)); + LdrRr(rt, memOp.BaseAddress, memOp.Index, ArmExtensionType.Uxtx, memOp.Scale != Multiplier.x1); + } + else + { + LdrRiUn(rt, memOp.BaseAddress, memOp.Displacement); + } + } + else + { + LdrRiUn(rt, rn, 0); + } + } + + public void LdrLit(Operand rt, int offset) + { + uint instruction = 0x18000000u | (EncodeSImm19_2(offset) << 5); + + if (rt.Type == OperandType.I64) + { + instruction |= 1u << 30; + } + + WriteInstruction(instruction, rt); + } + + public void LdrRiPost(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb8400400u, 0x3c400400u, rt.Type) | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void LdrRiPre(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb8400c00u, 0x3c400c00u, rt.Type) | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void LdrRiUn(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb9400000u, 0x3d400000u, rt.Type) | (EncodeUImm12(imm, rt.Type) << 10); + WriteInstruction(instruction, rt, rn); + } + + public void LdrRr(Operand rt, Operand rn, Operand rm, ArmExtensionType extensionType, bool shift) + { + uint instruction = GetLdrStrInstruction(0xb8600800u, 0x3ce00800u, rt.Type); + WriteInstructionLdrStrAuto(instruction, rt, rn, rm, extensionType, shift); + } + + public void LdrbRiPost(Operand rt, Operand rn, int imm) + { + uint instruction = 0x38400400u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void LdrbRiPre(Operand rt, Operand rn, int imm) + { + uint instruction = 0x38400c00u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void LdrbRiUn(Operand rt, Operand rn, int imm) + { + uint instruction = 0x39400000u | (EncodeUImm12(imm, 0) << 10); + WriteInstruction(instruction, rt, rn); + } + + public void LdrhRiPost(Operand rt, Operand rn, int imm) + { + uint instruction = 0x78400400u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void LdrhRiPre(Operand rt, Operand rn, int imm) + { + uint instruction = 0x78400c00u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void LdrhRiUn(Operand rt, Operand rn, int imm) + { + uint instruction = 0x79400000u | (EncodeUImm12(imm, 1) << 10); + WriteInstruction(instruction, rt, rn); + } + + public void Ldur(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb8400000u, 0x3c400000u, rt.Type) | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void Lsl(Operand rd, Operand rn, Operand rm) + { + if (rm.Kind == OperandKind.Constant) + { + int shift = rm.AsInt32(); + int mask = rd.Type == OperandType.I64 ? 63 : 31; + shift &= mask; + Ubfm(rd, rn, -shift & mask, mask - shift); + } + else + { + Lslv(rd, rn, rm); + } + } + + public void Lslv(Operand rd, Operand rn, Operand rm) + { + WriteInstructionBitwiseAuto(0x1ac02000u, rd, rn, rm); + } + + public void Lsr(Operand rd, Operand rn, Operand rm) + { + if (rm.Kind == OperandKind.Constant) + { + int shift = rm.AsInt32(); + int mask = rd.Type == OperandType.I64 ? 63 : 31; + shift &= mask; + Ubfm(rd, rn, shift, mask); + } + else + { + Lsrv(rd, rn, rm); + } + } + + public void Lsrv(Operand rd, Operand rn, Operand rm) + { + WriteInstructionBitwiseAuto(0x1ac02400u, rd, rn, rm); + } + + public void Madd(Operand rd, Operand rn, Operand rm, Operand ra) + { + WriteInstructionAuto(0x1b000000u, rd, rn, rm, ra); + } + + public void Mul(Operand rd, Operand rn, Operand rm) + { + Madd(rd, rn, rm, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type)); + } + + public void Mov(Operand rd, Operand rn) + { + if (rd.Type.IsInteger()) + { + Orr(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn); + } + else + { + OrrVector(rd, rn, rn); + } + } + + public void MovSp(Operand rd, Operand rn) + { + if (rd.GetRegister().Index == SpRegister || + rn.GetRegister().Index == SpRegister) + { + Add(rd, rn, Factory.Const(rd.Type, 0), immForm: true); + } + else + { + Mov(rd, rn); + } + } + + public void Mov(Operand rd, int imm) + { + Movz(rd, imm, 0); + } + + public void Movz(Operand rd, int imm, int hw) + { + Debug.Assert((hw & (rd.Type == OperandType.I64 ? 3 : 1)) == hw); + WriteInstructionAuto(0x52800000u | (EncodeUImm16(imm) << 5) | ((uint)hw << 21), rd); + } + + public void Movk(Operand rd, int imm, int hw) + { + Debug.Assert((hw & (rd.Type == OperandType.I64 ? 3 : 1)) == hw); + WriteInstructionAuto(0x72800000u | (EncodeUImm16(imm) << 5) | ((uint)hw << 21), rd); + } + + public void Mrs(Operand rt, uint o0, uint op1, uint crn, uint crm, uint op2) + { + uint instruction = 0xd5300000u; + + instruction |= (op2 & 7) << 5; + instruction |= (crm & 15) << 8; + instruction |= (crn & 15) << 12; + instruction |= (op1 & 7) << 16; + instruction |= (o0 & 1) << 19; + + WriteInstruction(instruction, rt); + } + + public void Mvn(Operand rd, Operand rn, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + Orn(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn, shiftType, shiftAmount); + } + + public void Neg(Operand rd, Operand rn, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + Sub(rd, Factory.Register(ZrRegister, RegisterType.Integer, rd.Type), rn, shiftType, shiftAmount); + } + + public void Orn(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionBitwiseAuto(0x2a200000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void Orr(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionBitwiseAuto(0x32000000u, 0x2a000000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void OrrVector(Operand rd, Operand rn, Operand rm, bool q = true) + { + WriteSimdInstruction(0x0ea01c00u, rd, rn, rm, q); + } + + public void Ret(Operand rn) + { + WriteUInt32(0xd65f0000u | (EncodeReg(rn) << 5)); + } + + public void Rev(Operand rd, Operand rn) + { + uint opc0 = rd.Type == OperandType.I64 ? 1u << 10 : 0u; + WriteInstructionAuto(0x5ac00800u | opc0, rd, rn); + } + + public void Ror(Operand rd, Operand rn, Operand rm) + { + if (rm.Kind == OperandKind.Constant) + { + int shift = rm.AsInt32(); + int mask = rd.Type == OperandType.I64 ? 63 : 31; + shift &= mask; + Extr(rd, rn, rn, shift); + } + else + { + Rorv(rd, rn, rm); + } + } + + public void Rorv(Operand rd, Operand rn, Operand rm) + { + WriteInstructionBitwiseAuto(0x1ac02c00u, rd, rn, rm); + } + + public void Sbfm(Operand rd, Operand rn, int immr, int imms) + { + uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u; + WriteInstructionAuto(0x13000000u | n | (EncodeUImm6(imms) << 10) | (EncodeUImm6(immr) << 16), rd, rn); + } + + public void ScvtfScalar(Operand rd, Operand rn) + { + uint instruction = 0x1e220000u; + + if (rn.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + WriteFPInstructionAuto(instruction, rd, rn); + } + + public void Sdiv(Operand rd, Operand rn, Operand rm) + { + WriteInstructionRm16Auto(0x1ac00c00u, rd, rn, rm); + } + + public void Smulh(Operand rd, Operand rn, Operand rm) + { + WriteInstructionRm16(0x9b407c00u, rd, rn, rm); + } + + public void Stlxp(Operand rt, Operand rt2, Operand rn, Operand rs) + { + WriteInstruction(0x88208000u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rs, rt2); + } + + public void Stlxr(Operand rt, Operand rn, Operand rs) + { + WriteInstructionRm16(0x0800fc00u | ((rt.Type == OperandType.I64 ? 3u : 2u) << 30), rt, rn, rs); + } + + public void Stlxrb(Operand rt, Operand rn, Operand rs) + { + WriteInstructionRm16(0x0800fc00u, rt, rn, rs); + } + + public void Stlxrh(Operand rt, Operand rn, Operand rs) + { + WriteInstructionRm16(0x0800fc00u | (1u << 30), rt, rn, rs); + } + + public void StpRiPost(Operand rt, Operand rt2, Operand rn, int imm) + { + uint instruction = GetLdpStpInstruction(0x28800000u, 0x2c800000u, imm, rt.Type); + WriteInstruction(instruction, rt, rn, rt2); + } + + public void StpRiPre(Operand rt, Operand rt2, Operand rn, int imm) + { + uint instruction = GetLdpStpInstruction(0x29800000u, 0x2d800000u, imm, rt.Type); + WriteInstruction(instruction, rt, rn, rt2); + } + + public void StpRiUn(Operand rt, Operand rt2, Operand rn, int imm) + { + uint instruction = GetLdpStpInstruction(0x29000000u, 0x2d000000u, imm, rt.Type); + WriteInstruction(instruction, rt, rn, rt2); + } + + public void Str(Operand rt, Operand rn) + { + if (rn.Kind == OperandKind.Memory) + { + MemoryOperand memOp = rn.GetMemory(); + + if (memOp.Index != default) + { + Debug.Assert(memOp.Displacement == 0); + Debug.Assert(memOp.Scale == Multiplier.x1 || (int)memOp.Scale == GetScaleForType(rt.Type)); + StrRr(rt, memOp.BaseAddress, memOp.Index, ArmExtensionType.Uxtx, memOp.Scale != Multiplier.x1); + } + else + { + StrRiUn(rt, memOp.BaseAddress, memOp.Displacement); + } + } + else + { + StrRiUn(rt, rn, 0); + } + } + + public void StrRiPost(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb8000400u, 0x3c000400u, rt.Type) | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void StrRiPre(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb8000c00u, 0x3c000c00u, rt.Type) | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void StrRiUn(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb9000000u, 0x3d000000u, rt.Type) | (EncodeUImm12(imm, rt.Type) << 10); + WriteInstruction(instruction, rt, rn); + } + + public void StrRr(Operand rt, Operand rn, Operand rm, ArmExtensionType extensionType, bool shift) + { + uint instruction = GetLdrStrInstruction(0xb8200800u, 0x3ca00800u, rt.Type); + WriteInstructionLdrStrAuto(instruction, rt, rn, rm, extensionType, shift); + } + + public void StrbRiPost(Operand rt, Operand rn, int imm) + { + uint instruction = 0x38000400u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void StrbRiPre(Operand rt, Operand rn, int imm) + { + uint instruction = 0x38000c00u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void StrbRiUn(Operand rt, Operand rn, int imm) + { + uint instruction = 0x39000000u | (EncodeUImm12(imm, 0) << 10); + WriteInstruction(instruction, rt, rn); + } + + public void StrhRiPost(Operand rt, Operand rn, int imm) + { + uint instruction = 0x78000400u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void StrhRiPre(Operand rt, Operand rn, int imm) + { + uint instruction = 0x78000c00u | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void StrhRiUn(Operand rt, Operand rn, int imm) + { + uint instruction = 0x79000000u | (EncodeUImm12(imm, 1) << 10); + WriteInstruction(instruction, rt, rn); + } + + public void Stur(Operand rt, Operand rn, int imm) + { + uint instruction = GetLdrStrInstruction(0xb8000000u, 0x3c000000u, rt.Type) | (EncodeSImm9(imm) << 12); + WriteInstruction(instruction, rt, rn); + } + + public void Sub(Operand rd, Operand rn, Operand rm, ArmExtensionType extensionType, int shiftAmount = 0) + { + WriteInstructionAuto(0x4b200000u, rd, rn, rm, extensionType, shiftAmount); + } + + public void Sub(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionAuto(0x51000000u, 0x4b000000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void Subs(Operand rd, Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + WriteInstructionAuto(0x71000000u, 0x6b000000u, rd, rn, rm, shiftType, shiftAmount); + } + + public void Sxtb(Operand rd, Operand rn) + { + Sbfm(rd, rn, 0, 7); + } + + public void Sxth(Operand rd, Operand rn) + { + Sbfm(rd, rn, 0, 15); + } + + public void Sxtw(Operand rd, Operand rn) + { + Sbfm(rd, rn, 0, 31); + } + + public void Tst(Operand rn, Operand rm, ArmShiftType shiftType = ArmShiftType.Lsl, int shiftAmount = 0) + { + Ands(Factory.Register(ZrRegister, RegisterType.Integer, rn.Type), rn, rm, shiftType, shiftAmount); + } + + public void Ubfm(Operand rd, Operand rn, int immr, int imms) + { + uint n = rd.Type == OperandType.I64 ? 1u << 22 : 0u; + WriteInstructionAuto(0x53000000u | n | (EncodeUImm6(imms) << 10) | (EncodeUImm6(immr) << 16), rd, rn); + } + + public void UcvtfScalar(Operand rd, Operand rn) + { + uint instruction = 0x1e230000u; + + if (rn.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + WriteFPInstructionAuto(instruction, rd, rn); + } + + public void Udiv(Operand rd, Operand rn, Operand rm) + { + WriteInstructionRm16Auto(0x1ac00800u, rd, rn, rm); + } + + public void Umov(Operand rd, Operand rn, int index, int size) + { + uint q = size == 3 ? 1u << 30 : 0u; + WriteInstruction(0x0e003c00u | (EncodeIndexSizeImm5(index, size) << 16) | q, rd, rn); + } + + public void Umulh(Operand rd, Operand rn, Operand rm) + { + WriteInstructionRm16(0x9bc07c00u, rd, rn, rm); + } + + public void Uxtb(Operand rd, Operand rn) + { + Ubfm(rd, rn, 0, 7); + } + + public void Uxth(Operand rd, Operand rn) + { + Ubfm(rd, rn, 0, 15); + } + + private void WriteInstructionAuto( + uint instI, + uint instR, + Operand rd, + Operand rn, + Operand rm, + ArmShiftType shiftType = ArmShiftType.Lsl, + int shiftAmount = 0, + bool immForm = false) + { + if (rm.Kind == OperandKind.Constant && (rm.Value != 0 || immForm)) + { + Debug.Assert(shiftAmount == 0); + int imm = rm.AsInt32(); + Debug.Assert((uint)imm == rm.Value); + if (imm != 0 && (imm & 0xfff) == 0) + { + instI |= 1 << 22; // sh flag + imm >>= 12; + } + WriteInstructionAuto(instI | (EncodeUImm12(imm, 0) << 10), rd, rn); + } + else + { + instR |= EncodeUImm6(shiftAmount) << 10; + instR |= (uint)shiftType << 22; + + WriteInstructionRm16Auto(instR, rd, rn, rm); + } + } + + private void WriteInstructionAuto( + uint instruction, + Operand rd, + Operand rn, + Operand rm, + ArmExtensionType extensionType, + int shiftAmount = 0) + { + Debug.Assert((uint)shiftAmount <= 4); + + instruction |= (uint)shiftAmount << 10; + instruction |= (uint)extensionType << 13; + + WriteInstructionRm16Auto(instruction, rd, rn, rm); + } + + private void WriteInstructionBitwiseAuto( + uint instI, + uint instR, + Operand rd, + Operand rn, + Operand rm, + ArmShiftType shiftType = ArmShiftType.Lsl, + int shiftAmount = 0) + { + if (rm.Kind == OperandKind.Constant && rm.Value != 0) + { + Debug.Assert(shiftAmount == 0); + bool canEncode = CodeGenCommon.TryEncodeBitMask(rm, out int immN, out int immS, out int immR); + Debug.Assert(canEncode); + uint instruction = instI | ((uint)immS << 10) | ((uint)immR << 16) | ((uint)immN << 22); + + WriteInstructionAuto(instruction, rd, rn); + } + else + { + WriteInstructionBitwiseAuto(instR, rd, rn, rm, shiftType, shiftAmount); + } + } + + private void WriteInstructionBitwiseAuto( + uint instruction, + Operand rd, + Operand rn, + Operand rm, + ArmShiftType shiftType = ArmShiftType.Lsl, + int shiftAmount = 0) + { + if (rd.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + instruction |= EncodeUImm6(shiftAmount) << 10; + instruction |= (uint)shiftType << 22; + + WriteInstructionRm16(instruction, rd, rn, rm); + } + + private void WriteInstructionLdrStrAuto( + uint instruction, + Operand rd, + Operand rn, + Operand rm, + ArmExtensionType extensionType, + bool shift) + { + if (shift) + { + instruction |= 1u << 12; + } + + instruction |= (uint)extensionType << 13; + + if (rd.Type == OperandType.I64) + { + instruction |= 1u << 30; + } + + WriteInstructionRm16(instruction, rd, rn, rm); + } + + private void WriteInstructionAuto(uint instruction, Operand rd) + { + if (rd.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + WriteInstruction(instruction, rd); + } + + public void WriteInstructionAuto(uint instruction, Operand rd, Operand rn) + { + if (rd.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + WriteInstruction(instruction, rd, rn); + } + + private void WriteInstructionAuto(uint instruction, Operand rd, Operand rn, Operand rm, Operand ra) + { + if (rd.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + WriteInstruction(instruction, rd, rn, rm, ra); + } + + public void WriteInstruction(uint instruction, Operand rd) + { + WriteUInt32(instruction | EncodeReg(rd)); + } + + public void WriteInstruction(uint instruction, Operand rd, Operand rn) + { + WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5)); + } + + public void WriteInstruction(uint instruction, Operand rd, Operand rn, Operand rm) + { + WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 10)); + } + + public void WriteInstruction(uint instruction, Operand rd, Operand rn, Operand rm, Operand ra) + { + WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(ra) << 10) | (EncodeReg(rm) << 16)); + } + + private void WriteFPInstructionAuto(uint instruction, Operand rd, Operand rn) + { + if (rd.Type == OperandType.FP64) + { + instruction |= 1u << 22; + } + + WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5)); + } + + private void WriteFPInstructionAuto(uint instruction, Operand rd, Operand rn, Operand rm) + { + if (rd.Type == OperandType.FP64) + { + instruction |= 1u << 22; + } + + WriteInstructionRm16(instruction, rd, rn, rm); + } + + private void WriteSimdInstruction(uint instruction, Operand rd, Operand rn, Operand rm, bool q = true) + { + if (q) + { + instruction |= 1u << 30; + } + + WriteInstructionRm16(instruction, rd, rn, rm); + } + + private void WriteInstructionRm16Auto(uint instruction, Operand rd, Operand rn, Operand rm) + { + if (rd.Type == OperandType.I64) + { + instruction |= SfFlag; + } + + WriteInstructionRm16(instruction, rd, rn, rm); + } + + public void WriteInstructionRm16(uint instruction, Operand rd, Operand rn, Operand rm) + { + WriteUInt32(instruction | EncodeReg(rd) | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 16)); + } + + public void WriteInstructionRm16NoRet(uint instruction, Operand rn, Operand rm) + { + WriteUInt32(instruction | (EncodeReg(rn) << 5) | (EncodeReg(rm) << 16)); + } + + private static uint GetLdpStpInstruction(uint intInst, uint vecInst, int imm, OperandType type) + { + uint instruction; + int scale; + + if (type.IsInteger()) + { + instruction = intInst; + + if (type == OperandType.I64) + { + instruction |= SfFlag; + scale = 3; + } + else + { + scale = 2; + } + } + else + { + int opc = type switch + { + OperandType.FP32 => 0, + OperandType.FP64 => 1, + _ => 2 + }; + + instruction = vecInst | ((uint)opc << 30); + scale = 2 + opc; + } + + instruction |= (EncodeSImm7(imm, scale) << 15); + + return instruction; + } + + private static uint GetLdrStrInstruction(uint intInst, uint vecInst, OperandType type) + { + uint instruction; + + if (type.IsInteger()) + { + instruction = intInst; + + if (type == OperandType.I64) + { + instruction |= 1 << 30; + } + } + else + { + instruction = vecInst; + + if (type == OperandType.V128) + { + instruction |= 1u << 23; + } + else + { + instruction |= type == OperandType.FP32 ? 2u << 30 : 3u << 30; + } + } + + return instruction; + } + + private static uint EncodeIndexSizeImm5(int index, int size) + { + Debug.Assert((uint)size < 4); + Debug.Assert((uint)index < (16u >> size), $"Invalid index {index} and size {size} combination."); + return ((uint)index << (size + 1)) | (1u << size); + } + + private static uint EncodeSImm7(int value, int scale) + { + uint imm = (uint)(value >> scale) & 0x7f; + Debug.Assert(((int)imm << 25) >> (25 - scale) == value, $"Failed to encode constant 0x{value:X} with scale {scale}."); + return imm; + } + + private static uint EncodeSImm9(int value) + { + uint imm = (uint)value & 0x1ff; + Debug.Assert(((int)imm << 23) >> 23 == value, $"Failed to encode constant 0x{value:X}."); + return imm; + } + + private static uint EncodeSImm19_2(int value) + { + uint imm = (uint)(value >> 2) & 0x7ffff; + Debug.Assert(((int)imm << 13) >> 11 == value, $"Failed to encode constant 0x{value:X}."); + return imm; + } + + private static uint EncodeSImm26_2(int value) + { + uint imm = (uint)(value >> 2) & 0x3ffffff; + Debug.Assert(((int)imm << 6) >> 4 == value, $"Failed to encode constant 0x{value:X}."); + return imm; + } + + private static uint EncodeUImm4(int value) + { + uint imm = (uint)value & 0xf; + Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}."); + return imm; + } + + private static uint EncodeUImm6(int value) + { + uint imm = (uint)value & 0x3f; + Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}."); + return imm; + } + + private static uint EncodeUImm12(int value, OperandType type) + { + return EncodeUImm12(value, GetScaleForType(type)); + } + + private static uint EncodeUImm12(int value, int scale) + { + uint imm = (uint)(value >> scale) & 0xfff; + Debug.Assert((int)imm << scale == value, $"Failed to encode constant 0x{value:X} with scale {scale}."); + return imm; + } + + private static uint EncodeUImm16(int value) + { + uint imm = (uint)value & 0xffff; + Debug.Assert((int)imm == value, $"Failed to encode constant 0x{value:X}."); + return imm; + } + + private static uint EncodeReg(Operand reg) + { + if (reg.Kind == OperandKind.Constant && reg.Value == 0) + { + return ZrRegister; + } + + uint regIndex = (uint)reg.GetRegister().Index; + Debug.Assert(reg.Kind == OperandKind.Register); + Debug.Assert(regIndex < 32); + return regIndex; + } + + public static int GetScaleForType(OperandType type) + { + return type switch + { + OperandType.I32 => 2, + OperandType.I64 => 3, + OperandType.FP32 => 2, + OperandType.FP64 => 3, + OperandType.V128 => 4, + _ => throw new ArgumentException($"Invalid type {type}.") + }; + } + + private void WriteInt16(short value) + { + WriteUInt16((ushort)value); + } + + private void WriteInt32(int value) + { + WriteUInt32((uint)value); + } + + private void WriteByte(byte value) + { + _stream.WriteByte(value); + } + + private void WriteUInt16(ushort value) + { + _stream.WriteByte((byte)(value >> 0)); + _stream.WriteByte((byte)(value >> 8)); + } + + private void WriteUInt32(uint value) + { + _stream.WriteByte((byte)(value >> 0)); + _stream.WriteByte((byte)(value >> 8)); + _stream.WriteByte((byte)(value >> 16)); + _stream.WriteByte((byte)(value >> 24)); + } + } +} diff --git a/src/ARMeilleure/CodeGen/Arm64/CallingConvention.cs b/src/ARMeilleure/CodeGen/Arm64/CallingConvention.cs new file mode 100644 index 00000000..fda8d786 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/CallingConvention.cs @@ -0,0 +1,96 @@ +using System; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class CallingConvention + { + private const int RegistersMask = unchecked((int)0xffffffff); + + // Some of those register have specific roles and can't be used as general purpose registers. + // X18 - Reserved for platform specific usage. + // X29 - Frame pointer. + // X30 - Return address. + // X31 - Not an actual register, in some cases maps to SP, and in others to ZR. + private const int ReservedRegsMask = (1 << CodeGenCommon.ReservedRegister) | (1 << 18) | (1 << 29) | (1 << 30) | (1 << 31); + + public static int GetIntAvailableRegisters() + { + return RegistersMask & ~ReservedRegsMask; + } + + public static int GetVecAvailableRegisters() + { + return RegistersMask; + } + + public static int GetIntCallerSavedRegisters() + { + return (GetIntCalleeSavedRegisters() ^ RegistersMask) & ~ReservedRegsMask; + } + + public static int GetFpCallerSavedRegisters() + { + return GetFpCalleeSavedRegisters() ^ RegistersMask; + } + + public static int GetVecCallerSavedRegisters() + { + return GetVecCalleeSavedRegisters() ^ RegistersMask; + } + + public static int GetIntCalleeSavedRegisters() + { + return 0x1ff80000; // X19 to X28 + } + + public static int GetFpCalleeSavedRegisters() + { + return 0xff00; // D8 to D15 + } + + public static int GetVecCalleeSavedRegisters() + { + return 0; + } + + public static int GetArgumentsOnRegsCount() + { + return 8; + } + + public static int GetIntArgumentRegister(int index) + { + if ((uint)index < (uint)GetArgumentsOnRegsCount()) + { + return index; + } + + throw new ArgumentOutOfRangeException(nameof(index)); + } + + public static int GetVecArgumentRegister(int index) + { + if ((uint)index < (uint)GetArgumentsOnRegsCount()) + { + return index; + } + + throw new ArgumentOutOfRangeException(nameof(index)); + } + + public static int GetIntReturnRegister() + { + return 0; + } + + public static int GetIntReturnRegisterHigh() + { + return 1; + } + + public static int GetVecReturnRegister() + { + return 0; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs b/src/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs new file mode 100644 index 00000000..8d1e597b --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/CodeGenCommon.cs @@ -0,0 +1,91 @@ +using ARMeilleure.IntermediateRepresentation; +using System.Numerics; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class CodeGenCommon + { + public const int TcAddressRegister = 8; + public const int ReservedRegister = 17; + + public static bool ConstFitsOnSImm7(int value, int scale) + { + return (((value >> scale) << 25) >> (25 - scale)) == value; + } + + public static bool ConstFitsOnSImm9(int value) + { + return ((value << 23) >> 23) == value; + } + + public static bool ConstFitsOnUImm12(int value) + { + return (value & 0xfff) == value; + } + + public static bool ConstFitsOnUImm12(int value, OperandType type) + { + int scale = Assembler.GetScaleForType(type); + return (((value >> scale) & 0xfff) << scale) == value; + } + + public static bool TryEncodeBitMask(Operand operand, out int immN, out int immS, out int immR) + { + return TryEncodeBitMask(operand.Type, operand.Value, out immN, out immS, out immR); + } + + public static bool TryEncodeBitMask(OperandType type, ulong value, out int immN, out int immS, out int immR) + { + if (type == OperandType.I32) + { + value |= value << 32; + } + + return TryEncodeBitMask(value, out immN, out immS, out immR); + } + + public static bool TryEncodeBitMask(ulong value, out int immN, out int immS, out int immR) + { + // Some special values also can't be encoded: + // 0 can't be encoded because we need to subtract 1 from onesCount (which would became negative if 0). + // A value with all bits set can't be encoded because it is reserved according to the spec, because: + // Any value AND all ones will be equal itself, so it's effectively a no-op. + // Any value OR all ones will be equal all ones, so one can just use MOV. + // Any value XOR all ones will be equal its inverse, so one can just use MVN. + if (value == 0 || value == ulong.MaxValue) + { + immN = 0; + immS = 0; + immR = 0; + + return false; + } + + // Normalize value, rotating it such that the LSB is 1: Ensures we get a complete element that has not + // been cut-in-half across the word boundary. + int rotation = BitOperations.TrailingZeroCount(value & (value + 1)); + ulong rotatedValue = ulong.RotateRight(value, rotation); + + // Now that we have a complete element in the LSB with the LSB = 1, determine size and number of ones + // in element. + int elementSize = BitOperations.TrailingZeroCount(rotatedValue & (rotatedValue + 1)); + int onesInElement = BitOperations.TrailingZeroCount(~rotatedValue); + + // Check the value is repeating; also ensures element size is a power of two. + if (ulong.RotateRight(value, elementSize) != value) + { + immN = 0; + immS = 0; + immR = 0; + + return false; + } + + immN = (elementSize >> 6) & 1; + immS = (((~elementSize + 1) << 1) | (onesInElement - 1)) & 0x3f; + immR = (elementSize - rotation) & (elementSize - 1); + + return true; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs b/src/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs new file mode 100644 index 00000000..0dd5355f --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/CodeGenContext.cs @@ -0,0 +1,287 @@ +using ARMeilleure.CodeGen.Linking; +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.IntermediateRepresentation; +using Ryujinx.Common.Memory; +using System; +using System.Collections.Generic; +using System.IO; + +namespace ARMeilleure.CodeGen.Arm64 +{ + class CodeGenContext + { + private const int BccInstLength = 4; + private const int CbnzInstLength = 4; + private const int LdrLitInstLength = 4; + + private Stream _stream; + + public int StreamOffset => (int)_stream.Length; + + public AllocationResult AllocResult { get; } + + public Assembler Assembler { get; } + + public BasicBlock CurrBlock { get; private set; } + + public bool HasCall { get; } + + public int CallArgsRegionSize { get; } + public int FpLrSaveRegionSize { get; } + + private readonly Dictionary<BasicBlock, long> _visitedBlocks; + private readonly Dictionary<BasicBlock, List<(ArmCondition Condition, long BranchPos)>> _pendingBranches; + + private struct ConstantPoolEntry + { + public readonly int Offset; + public readonly Symbol Symbol; + public readonly List<(Operand, int)> LdrOffsets; + + public ConstantPoolEntry(int offset, Symbol symbol) + { + Offset = offset; + Symbol = symbol; + LdrOffsets = new List<(Operand, int)>(); + } + } + + private readonly Dictionary<ulong, ConstantPoolEntry> _constantPool; + + private bool _constantPoolWritten; + private long _constantPoolOffset; + + private ArmCondition _jNearCondition; + private Operand _jNearValue; + + private long _jNearPosition; + + private readonly bool _relocatable; + + public CodeGenContext(AllocationResult allocResult, int maxCallArgs, int blocksCount, bool relocatable) + { + _stream = MemoryStreamManager.Shared.GetStream(); + + AllocResult = allocResult; + + Assembler = new Assembler(_stream); + + bool hasCall = maxCallArgs >= 0; + + HasCall = hasCall; + + if (maxCallArgs < 0) + { + maxCallArgs = 0; + } + + CallArgsRegionSize = maxCallArgs * 16; + FpLrSaveRegionSize = hasCall ? 16 : 0; + + _visitedBlocks = new Dictionary<BasicBlock, long>(); + _pendingBranches = new Dictionary<BasicBlock, List<(ArmCondition, long)>>(); + _constantPool = new Dictionary<ulong, ConstantPoolEntry>(); + + _relocatable = relocatable; + } + + public void EnterBlock(BasicBlock block) + { + CurrBlock = block; + + long target = _stream.Position; + + if (_pendingBranches.TryGetValue(block, out var list)) + { + foreach (var tuple in list) + { + _stream.Seek(tuple.BranchPos, SeekOrigin.Begin); + WriteBranch(tuple.Condition, target); + } + + _stream.Seek(target, SeekOrigin.Begin); + _pendingBranches.Remove(block); + } + + _visitedBlocks.Add(block, target); + } + + public void JumpTo(BasicBlock target) + { + JumpTo(ArmCondition.Al, target); + } + + public void JumpTo(ArmCondition condition, BasicBlock target) + { + if (_visitedBlocks.TryGetValue(target, out long offset)) + { + WriteBranch(condition, offset); + } + else + { + if (!_pendingBranches.TryGetValue(target, out var list)) + { + list = new List<(ArmCondition, long)>(); + _pendingBranches.Add(target, list); + } + + list.Add((condition, _stream.Position)); + + _stream.Seek(BccInstLength, SeekOrigin.Current); + } + } + + private void WriteBranch(ArmCondition condition, long to) + { + int imm = checked((int)(to - _stream.Position)); + + if (condition != ArmCondition.Al) + { + Assembler.B(condition, imm); + } + else + { + Assembler.B(imm); + } + } + + public void JumpToNear(ArmCondition condition) + { + _jNearCondition = condition; + _jNearPosition = _stream.Position; + + _stream.Seek(BccInstLength, SeekOrigin.Current); + } + + public void JumpToNearIfNotZero(Operand value) + { + _jNearValue = value; + _jNearPosition = _stream.Position; + + _stream.Seek(CbnzInstLength, SeekOrigin.Current); + } + + public void JumpHere() + { + long currentPosition = _stream.Position; + long offset = currentPosition - _jNearPosition; + + _stream.Seek(_jNearPosition, SeekOrigin.Begin); + + if (_jNearValue != default) + { + Assembler.Cbnz(_jNearValue, checked((int)offset)); + _jNearValue = default; + } + else + { + Assembler.B(_jNearCondition, checked((int)offset)); + } + + _stream.Seek(currentPosition, SeekOrigin.Begin); + } + + public void ReserveRelocatableConstant(Operand rt, Symbol symbol, ulong value) + { + if (!_constantPool.TryGetValue(value, out ConstantPoolEntry cpe)) + { + cpe = new ConstantPoolEntry(_constantPool.Count * sizeof(ulong), symbol); + _constantPool.Add(value, cpe); + } + + cpe.LdrOffsets.Add((rt, (int)_stream.Position)); + _stream.Seek(LdrLitInstLength, SeekOrigin.Current); + } + + private long WriteConstantPool() + { + if (_constantPoolWritten) + { + return _constantPoolOffset; + } + + long constantPoolBaseOffset = _stream.Position; + + foreach (ulong value in _constantPool.Keys) + { + WriteUInt64(value); + } + + foreach (ConstantPoolEntry cpe in _constantPool.Values) + { + foreach ((Operand rt, int ldrOffset) in cpe.LdrOffsets) + { + _stream.Seek(ldrOffset, SeekOrigin.Begin); + + int absoluteOffset = checked((int)(constantPoolBaseOffset + cpe.Offset)); + int pcRelativeOffset = absoluteOffset - ldrOffset; + + Assembler.LdrLit(rt, pcRelativeOffset); + } + } + + _stream.Seek(constantPoolBaseOffset + _constantPool.Count * sizeof(ulong), SeekOrigin.Begin); + + _constantPoolOffset = constantPoolBaseOffset; + _constantPoolWritten = true; + + return constantPoolBaseOffset; + } + + public (byte[], RelocInfo) GetCode() + { + long constantPoolBaseOffset = WriteConstantPool(); + + byte[] code = new byte[_stream.Length]; + + long originalPosition = _stream.Position; + + _stream.Seek(0, SeekOrigin.Begin); + _stream.Read(code, 0, code.Length); + _stream.Seek(originalPosition, SeekOrigin.Begin); + + RelocInfo relocInfo; + + if (_relocatable) + { + RelocEntry[] relocs = new RelocEntry[_constantPool.Count]; + + int index = 0; + + foreach (ConstantPoolEntry cpe in _constantPool.Values) + { + if (cpe.Symbol.Type != SymbolType.None) + { + int absoluteOffset = checked((int)(constantPoolBaseOffset + cpe.Offset)); + relocs[index++] = new RelocEntry(absoluteOffset, cpe.Symbol); + } + } + + if (index != relocs.Length) + { + Array.Resize(ref relocs, index); + } + + relocInfo = new RelocInfo(relocs); + } + else + { + relocInfo = new RelocInfo(Array.Empty<RelocEntry>()); + } + + return (code, relocInfo); + } + + private void WriteUInt64(ulong value) + { + _stream.WriteByte((byte)(value >> 0)); + _stream.WriteByte((byte)(value >> 8)); + _stream.WriteByte((byte)(value >> 16)); + _stream.WriteByte((byte)(value >> 24)); + _stream.WriteByte((byte)(value >> 32)); + _stream.WriteByte((byte)(value >> 40)); + _stream.WriteByte((byte)(value >> 48)); + _stream.WriteByte((byte)(value >> 56)); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs b/src/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs new file mode 100644 index 00000000..fc4fa976 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/CodeGenerator.cs @@ -0,0 +1,1580 @@ +using ARMeilleure.CodeGen.Linking; +using ARMeilleure.CodeGen.Optimizations; +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.CodeGen.Unwinding; +using ARMeilleure.Common; +using ARMeilleure.Diagnostics; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Numerics; + +using static ARMeilleure.IntermediateRepresentation.Operand; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class CodeGenerator + { + private const int DWordScale = 3; + + private const int RegistersCount = 32; + + private const int FpRegister = 29; + private const int LrRegister = 30; + private const int SpRegister = 31; + private const int ZrRegister = 31; + + private enum AccessSize + { + Byte, + Hword, + Auto + } + + private static Action<CodeGenContext, Operation>[] _instTable; + + static CodeGenerator() + { + _instTable = new Action<CodeGenContext, Operation>[EnumUtils.GetCount(typeof(Instruction))]; + + Add(Instruction.Add, GenerateAdd); + Add(Instruction.BitwiseAnd, GenerateBitwiseAnd); + Add(Instruction.BitwiseExclusiveOr, GenerateBitwiseExclusiveOr); + Add(Instruction.BitwiseNot, GenerateBitwiseNot); + Add(Instruction.BitwiseOr, GenerateBitwiseOr); + Add(Instruction.BranchIf, GenerateBranchIf); + Add(Instruction.ByteSwap, GenerateByteSwap); + Add(Instruction.Call, GenerateCall); + //Add(Instruction.Clobber, GenerateClobber); + Add(Instruction.Compare, GenerateCompare); + Add(Instruction.CompareAndSwap, GenerateCompareAndSwap); + Add(Instruction.CompareAndSwap16, GenerateCompareAndSwap16); + Add(Instruction.CompareAndSwap8, GenerateCompareAndSwap8); + Add(Instruction.ConditionalSelect, GenerateConditionalSelect); + Add(Instruction.ConvertI64ToI32, GenerateConvertI64ToI32); + Add(Instruction.ConvertToFP, GenerateConvertToFP); + Add(Instruction.ConvertToFPUI, GenerateConvertToFPUI); + Add(Instruction.Copy, GenerateCopy); + Add(Instruction.CountLeadingZeros, GenerateCountLeadingZeros); + Add(Instruction.Divide, GenerateDivide); + Add(Instruction.DivideUI, GenerateDivideUI); + Add(Instruction.Fill, GenerateFill); + Add(Instruction.Load, GenerateLoad); + Add(Instruction.Load16, GenerateLoad16); + Add(Instruction.Load8, GenerateLoad8); + Add(Instruction.MemoryBarrier, GenerateMemoryBarrier); + Add(Instruction.Multiply, GenerateMultiply); + Add(Instruction.Multiply64HighSI, GenerateMultiply64HighSI); + Add(Instruction.Multiply64HighUI, GenerateMultiply64HighUI); + Add(Instruction.Negate, GenerateNegate); + Add(Instruction.Return, GenerateReturn); + Add(Instruction.RotateRight, GenerateRotateRight); + Add(Instruction.ShiftLeft, GenerateShiftLeft); + Add(Instruction.ShiftRightSI, GenerateShiftRightSI); + Add(Instruction.ShiftRightUI, GenerateShiftRightUI); + Add(Instruction.SignExtend16, GenerateSignExtend16); + Add(Instruction.SignExtend32, GenerateSignExtend32); + Add(Instruction.SignExtend8, GenerateSignExtend8); + Add(Instruction.Spill, GenerateSpill); + Add(Instruction.SpillArg, GenerateSpillArg); + Add(Instruction.StackAlloc, GenerateStackAlloc); + Add(Instruction.Store, GenerateStore); + Add(Instruction.Store16, GenerateStore16); + Add(Instruction.Store8, GenerateStore8); + Add(Instruction.Subtract, GenerateSubtract); + Add(Instruction.Tailcall, GenerateTailcall); + Add(Instruction.VectorCreateScalar, GenerateVectorCreateScalar); + Add(Instruction.VectorExtract, GenerateVectorExtract); + Add(Instruction.VectorExtract16, GenerateVectorExtract16); + Add(Instruction.VectorExtract8, GenerateVectorExtract8); + Add(Instruction.VectorInsert, GenerateVectorInsert); + Add(Instruction.VectorInsert16, GenerateVectorInsert16); + Add(Instruction.VectorInsert8, GenerateVectorInsert8); + Add(Instruction.VectorOne, GenerateVectorOne); + Add(Instruction.VectorZero, GenerateVectorZero); + Add(Instruction.VectorZeroUpper64, GenerateVectorZeroUpper64); + Add(Instruction.VectorZeroUpper96, GenerateVectorZeroUpper96); + Add(Instruction.ZeroExtend16, GenerateZeroExtend16); + Add(Instruction.ZeroExtend32, GenerateZeroExtend32); + Add(Instruction.ZeroExtend8, GenerateZeroExtend8); + + static void Add(Instruction inst, Action<CodeGenContext, Operation> func) + { + _instTable[(int)inst] = func; + } + } + + public static CompiledFunction Generate(CompilerContext cctx) + { + ControlFlowGraph cfg = cctx.Cfg; + + Logger.StartPass(PassName.Optimization); + + if (cctx.Options.HasFlag(CompilerOptions.Optimize)) + { + if (cctx.Options.HasFlag(CompilerOptions.SsaForm)) + { + Optimizer.RunPass(cfg); + } + + BlockPlacement.RunPass(cfg); + } + + Arm64Optimizer.RunPass(cfg); + + Logger.EndPass(PassName.Optimization, cfg); + + Logger.StartPass(PassName.PreAllocation); + + StackAllocator stackAlloc = new(); + + PreAllocator.RunPass(cctx, stackAlloc, out int maxCallArgs); + + Logger.EndPass(PassName.PreAllocation, cfg); + + Logger.StartPass(PassName.RegisterAllocation); + + if (cctx.Options.HasFlag(CompilerOptions.SsaForm)) + { + Ssa.Deconstruct(cfg); + } + + IRegisterAllocator regAlloc; + + if (cctx.Options.HasFlag(CompilerOptions.Lsra)) + { + regAlloc = new LinearScanAllocator(); + } + else + { + regAlloc = new HybridAllocator(); + } + + RegisterMasks regMasks = new( + CallingConvention.GetIntAvailableRegisters(), + CallingConvention.GetVecAvailableRegisters(), + CallingConvention.GetIntCallerSavedRegisters(), + CallingConvention.GetVecCallerSavedRegisters(), + CallingConvention.GetIntCalleeSavedRegisters(), + CallingConvention.GetVecCalleeSavedRegisters(), + RegistersCount); + + AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks); + + Logger.EndPass(PassName.RegisterAllocation, cfg); + + Logger.StartPass(PassName.CodeGeneration); + + //Console.Error.WriteLine(IRDumper.GetDump(cfg)); + + bool relocatable = (cctx.Options & CompilerOptions.Relocatable) != 0; + + CodeGenContext context = new(allocResult, maxCallArgs, cfg.Blocks.Count, relocatable); + + UnwindInfo unwindInfo = WritePrologue(context); + + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + context.EnterBlock(block); + + for (Operation node = block.Operations.First; node != default;) + { + node = GenerateOperation(context, node); + } + + if (block.SuccessorsCount == 0) + { + // The only blocks which can have 0 successors are exit blocks. + Operation last = block.Operations.Last; + + Debug.Assert(last.Instruction == Instruction.Tailcall || + last.Instruction == Instruction.Return); + } + else + { + BasicBlock succ = block.GetSuccessor(0); + + if (succ != block.ListNext) + { + context.JumpTo(succ); + } + } + } + + (byte[] code, RelocInfo relocInfo) = context.GetCode(); + + Logger.EndPass(PassName.CodeGeneration); + + return new CompiledFunction(code, unwindInfo, relocInfo); + } + + private static Operation GenerateOperation(CodeGenContext context, Operation operation) + { + if (operation.Instruction == Instruction.Extended) + { + CodeGeneratorIntrinsic.GenerateOperation(context, operation); + } + else + { + if (IsLoadOrStore(operation) && + operation.ListNext != default && + operation.ListNext.Instruction == operation.Instruction && + TryPairMemoryOp(context, operation, operation.ListNext)) + { + // Skip next operation if we managed to pair them. + return operation.ListNext.ListNext; + } + + Action<CodeGenContext, Operation> func = _instTable[(int)operation.Instruction]; + + if (func != null) + { + func(context, operation); + } + else + { + throw new ArgumentException($"Invalid instruction \"{operation.Instruction}\"."); + } + } + + return operation.ListNext; + } + + private static void GenerateAdd(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + // ValidateBinOp(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Add(dest, src1, src2); + } + else + { + context.Assembler.FaddScalar(dest, src1, src2); + } + } + + private static void GenerateBitwiseAnd(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.And(dest, src1, src2); + } + + private static void GenerateBitwiseExclusiveOr(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Eor(dest, src1, src2); + } + else + { + context.Assembler.EorVector(dest, src1, src2); + } + } + + private static void GenerateBitwiseNot(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + ValidateUnOp(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Mvn(dest, source); + } + + private static void GenerateBitwiseOr(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Orr(dest, src1, src2); + } + + private static void GenerateBranchIf(CodeGenContext context, Operation operation) + { + Operand comp = operation.GetSource(2); + + Debug.Assert(comp.Kind == OperandKind.Constant); + + var cond = ((Comparison)comp.AsInt32()).ToArmCondition(); + + GenerateCompareCommon(context, operation); + + context.JumpTo(cond, context.CurrBlock.GetSuccessor(1)); + } + + private static void GenerateByteSwap(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + ValidateUnOp(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Rev(dest, source); + } + + private static void GenerateCall(CodeGenContext context, Operation operation) + { + context.Assembler.Blr(operation.GetSource(0)); + } + + private static void GenerateCompare(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand comp = operation.GetSource(2); + + Debug.Assert(dest.Type == OperandType.I32); + Debug.Assert(comp.Kind == OperandKind.Constant); + + var cond = ((Comparison)comp.AsInt32()).ToArmCondition(); + + GenerateCompareCommon(context, operation); + + context.Assembler.Cset(dest, cond); + } + + private static void GenerateCompareAndSwap(CodeGenContext context, Operation operation) + { + if (operation.SourcesCount == 5) // CompareAndSwap128 has 5 sources, compared to CompareAndSwap64/32's 3. + { + Operand actualLow = operation.GetDestination(0); + Operand actualHigh = operation.GetDestination(1); + Operand temp0 = operation.GetDestination(2); + Operand temp1 = operation.GetDestination(3); + Operand address = operation.GetSource(0); + Operand expectedLow = operation.GetSource(1); + Operand expectedHigh = operation.GetSource(2); + Operand desiredLow = operation.GetSource(3); + Operand desiredHigh = operation.GetSource(4); + + GenerateAtomicDcas( + context, + address, + expectedLow, + expectedHigh, + desiredLow, + desiredHigh, + actualLow, + actualHigh, + temp0, + temp1); + } + else + { + Operand actual = operation.GetDestination(0); + Operand result = operation.GetDestination(1); + Operand address = operation.GetSource(0); + Operand expected = operation.GetSource(1); + Operand desired = operation.GetSource(2); + + GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Auto); + } + } + + private static void GenerateCompareAndSwap16(CodeGenContext context, Operation operation) + { + Operand actual = operation.GetDestination(0); + Operand result = operation.GetDestination(1); + Operand address = operation.GetSource(0); + Operand expected = operation.GetSource(1); + Operand desired = operation.GetSource(2); + + GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Hword); + } + + private static void GenerateCompareAndSwap8(CodeGenContext context, Operation operation) + { + Operand actual = operation.GetDestination(0); + Operand result = operation.GetDestination(1); + Operand address = operation.GetSource(0); + Operand expected = operation.GetSource(1); + Operand desired = operation.GetSource(2); + + GenerateAtomicCas(context, address, expected, desired, actual, result, AccessSize.Byte); + } + + private static void GenerateCompareCommon(CodeGenContext context, Operation operation) + { + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(src1, src2); + + Debug.Assert(src1.Type.IsInteger()); + + context.Assembler.Cmp(src1, src2); + } + + private static void GenerateConditionalSelect(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + EnsureSameType(dest, src2, src3); + + Debug.Assert(dest.Type.IsInteger()); + Debug.Assert(src1.Type == OperandType.I32); + + context.Assembler.Cmp (src1, Const(src1.Type, 0)); + context.Assembler.Csel(dest, src2, src3, ArmCondition.Ne); + } + + private static void GenerateConvertI64ToI32(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.I32 && source.Type == OperandType.I64); + + context.Assembler.Mov(dest, Register(source, OperandType.I32)); + } + + private static void GenerateConvertToFP(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64); + Debug.Assert(dest.Type != source.Type); + Debug.Assert(source.Type != OperandType.V128); + + if (source.Type.IsInteger()) + { + context.Assembler.ScvtfScalar(dest, source); + } + else + { + context.Assembler.FcvtScalar(dest, source); + } + } + + private static void GenerateConvertToFPUI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64); + Debug.Assert(dest.Type != source.Type); + Debug.Assert(source.Type.IsInteger()); + + context.Assembler.UcvtfScalar(dest, source); + } + + private static void GenerateCopy(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + EnsureSameType(dest, source); + + Debug.Assert(dest.Type.IsInteger() || source.Kind != OperandKind.Constant); + + // Moves to the same register are useless. + if (dest.Kind == source.Kind && dest.Value == source.Value) + { + return; + } + + if (dest.Kind == OperandKind.Register && source.Kind == OperandKind.Constant) + { + if (source.Relocatable) + { + context.ReserveRelocatableConstant(dest, source.Symbol, source.Value); + } + else + { + GenerateConstantCopy(context, dest, source.Value); + } + } + else + { + context.Assembler.Mov(dest, source); + } + } + + private static void GenerateCountLeadingZeros(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + EnsureSameType(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Clz(dest, source); + } + + private static void GenerateDivide(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand dividend = operation.GetSource(0); + Operand divisor = operation.GetSource(1); + + ValidateBinOp(dest, dividend, divisor); + + if (dest.Type.IsInteger()) + { + context.Assembler.Sdiv(dest, dividend, divisor); + } + else + { + context.Assembler.FdivScalar(dest, dividend, divisor); + } + } + + private static void GenerateDivideUI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand dividend = operation.GetSource(0); + Operand divisor = operation.GetSource(1); + + ValidateBinOp(dest, dividend, divisor); + + context.Assembler.Udiv(dest, dividend, divisor); + } + + private static void GenerateLoad(CodeGenContext context, Operation operation) + { + Operand value = operation.Destination; + Operand address = operation.GetSource(0); + + context.Assembler.Ldr(value, address); + } + + private static void GenerateLoad16(CodeGenContext context, Operation operation) + { + Operand value = operation.Destination; + Operand address = operation.GetSource(0); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.LdrhRiUn(value, address, 0); + } + + private static void GenerateLoad8(CodeGenContext context, Operation operation) + { + Operand value = operation.Destination; + Operand address = operation.GetSource(0); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.LdrbRiUn(value, address, 0); + } + + private static void GenerateMemoryBarrier(CodeGenContext context, Operation operation) + { + context.Assembler.Dmb(0xf); + } + + private static void GenerateMultiply(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Mul(dest, src1, src2); + } + else + { + context.Assembler.FmulScalar(dest, src1, src2); + } + } + + private static void GenerateMultiply64HighSI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(dest, src1, src2); + + Debug.Assert(dest.Type == OperandType.I64); + + context.Assembler.Smulh(dest, src1, src2); + } + + private static void GenerateMultiply64HighUI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(dest, src1, src2); + + Debug.Assert(dest.Type == OperandType.I64); + + context.Assembler.Umulh(dest, src1, src2); + } + + private static void GenerateNegate(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + ValidateUnOp(dest, source); + + if (dest.Type.IsInteger()) + { + context.Assembler.Neg(dest, source); + } + else + { + context.Assembler.FnegScalar(dest, source); + } + } + + private static void GenerateLoad(CodeGenContext context, Operand value, Operand address, int offset) + { + if (CodeGenCommon.ConstFitsOnUImm12(offset, value.Type)) + { + context.Assembler.LdrRiUn(value, address, offset); + } + else if (CodeGenCommon.ConstFitsOnSImm9(offset)) + { + context.Assembler.Ldur(value, address, offset); + } + else + { + Operand tempAddress = Register(CodeGenCommon.ReservedRegister); + GenerateConstantCopy(context, tempAddress, (ulong)offset); + context.Assembler.Add(tempAddress, address, tempAddress, ArmExtensionType.Uxtx); // Address might be SP and must be the first input. + context.Assembler.LdrRiUn(value, tempAddress, 0); + } + } + + private static void GenerateReturn(CodeGenContext context, Operation operation) + { + WriteEpilogue(context); + + context.Assembler.Ret(Register(LrRegister)); + } + + private static void GenerateRotateRight(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Ror(dest, src1, src2); + } + + private static void GenerateShiftLeft(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Lsl(dest, src1, src2); + } + + private static void GenerateShiftRightSI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Asr(dest, src1, src2); + } + + private static void GenerateShiftRightUI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Lsr(dest, src1, src2); + } + + private static void GenerateSignExtend16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Sxth(dest, source); + } + + private static void GenerateSignExtend32(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Sxtw(dest, source); + } + + private static void GenerateSignExtend8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Sxtb(dest, source); + } + + private static void GenerateFill(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand offset = operation.GetSource(0); + + Debug.Assert(offset.Kind == OperandKind.Constant); + + int offs = offset.AsInt32() + context.CallArgsRegionSize + context.FpLrSaveRegionSize; + + GenerateLoad(context, dest, Register(SpRegister), offs); + } + + private static void GenerateStore(CodeGenContext context, Operand value, Operand address, int offset) + { + if (CodeGenCommon.ConstFitsOnUImm12(offset, value.Type)) + { + context.Assembler.StrRiUn(value, address, offset); + } + else if (CodeGenCommon.ConstFitsOnSImm9(offset)) + { + context.Assembler.Stur(value, address, offset); + } + else + { + Operand tempAddress = Register(CodeGenCommon.ReservedRegister); + GenerateConstantCopy(context, tempAddress, (ulong)offset); + context.Assembler.Add(tempAddress, address, tempAddress, ArmExtensionType.Uxtx); // Address might be SP and must be the first input. + context.Assembler.StrRiUn(value, tempAddress, 0); + } + } + + private static void GenerateSpill(CodeGenContext context, Operation operation) + { + GenerateSpill(context, operation, context.CallArgsRegionSize + context.FpLrSaveRegionSize); + } + + private static void GenerateSpillArg(CodeGenContext context, Operation operation) + { + GenerateSpill(context, operation, 0); + } + + private static void GenerateStackAlloc(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand offset = operation.GetSource(0); + + Debug.Assert(offset.Kind == OperandKind.Constant); + + int offs = offset.AsInt32() + context.CallArgsRegionSize + context.FpLrSaveRegionSize; + + context.Assembler.Add(dest, Register(SpRegister), Const(dest.Type, offs)); + } + + private static void GenerateStore(CodeGenContext context, Operation operation) + { + Operand value = operation.GetSource(1); + Operand address = operation.GetSource(0); + + context.Assembler.Str(value, address); + } + + private static void GenerateStore16(CodeGenContext context, Operation operation) + { + Operand value = operation.GetSource(1); + Operand address = operation.GetSource(0); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.StrhRiUn(value, address, 0); + } + + private static void GenerateStore8(CodeGenContext context, Operation operation) + { + Operand value = operation.GetSource(1); + Operand address = operation.GetSource(0); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.StrbRiUn(value, address, 0); + } + + private static void GenerateSpill(CodeGenContext context, Operation operation, int baseOffset) + { + Operand offset = operation.GetSource(0); + Operand source = operation.GetSource(1); + + Debug.Assert(offset.Kind == OperandKind.Constant); + + int offs = offset.AsInt32() + baseOffset; + + GenerateStore(context, source, Register(SpRegister), offs); + } + + private static void GenerateSubtract(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + // ValidateBinOp(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Sub(dest, src1, src2); + } + else + { + context.Assembler.FsubScalar(dest, src1, src2); + } + } + + private static void GenerateTailcall(CodeGenContext context, Operation operation) + { + WriteEpilogue(context); + + context.Assembler.Br(operation.GetSource(0)); + } + + private static void GenerateVectorCreateScalar(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + if (dest != default) + { + Debug.Assert(!dest.Type.IsInteger() && source.Type.IsInteger()); + + OperandType destType = source.Type == OperandType.I64 ? OperandType.FP64 : OperandType.FP32; + + context.Assembler.Fmov(Register(dest, destType), source, topHalf: false); + } + } + + private static void GenerateVectorExtract(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; // Value + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Index + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src2.Kind == OperandKind.Constant); + + byte index = src2.AsByte(); + + Debug.Assert(index < OperandType.V128.GetSizeInBytes() / dest.Type.GetSizeInBytes()); + + if (dest.Type.IsInteger()) + { + context.Assembler.Umov(dest, src1, index, dest.Type == OperandType.I64 ? 3 : 2); + } + else + { + context.Assembler.DupScalar(dest, src1, index, dest.Type == OperandType.FP64 ? 3 : 2); + } + } + + private static void GenerateVectorExtract16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; // Value + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Index + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src2.Kind == OperandKind.Constant); + + byte index = src2.AsByte(); + + Debug.Assert(index < 8); + + context.Assembler.Umov(dest, src1, index, 1); + } + + private static void GenerateVectorExtract8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; // Value + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Index + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src2.Kind == OperandKind.Constant); + + byte index = src2.AsByte(); + + Debug.Assert(index < 16); + + context.Assembler.Umov(dest, src1, index, 0); + } + + private static void GenerateVectorInsert(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Value + Operand src3 = operation.GetSource(2); // Index + + EnsureSameReg(dest, src1); + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + if (src2.Type.IsInteger()) + { + context.Assembler.Ins(dest, src2, index, src2.Type == OperandType.I64 ? 3 : 2); + } + else + { + context.Assembler.Ins(dest, src2, 0, index, src2.Type == OperandType.FP64 ? 3 : 2); + } + } + + private static void GenerateVectorInsert16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Value + Operand src3 = operation.GetSource(2); // Index + + EnsureSameReg(dest, src1); + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + context.Assembler.Ins(dest, src2, index, 1); + } + + private static void GenerateVectorInsert8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); // Vector + Operand src2 = operation.GetSource(1); // Value + Operand src3 = operation.GetSource(2); // Index + + EnsureSameReg(dest, src1); + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + context.Assembler.Ins(dest, src2, index, 0); + } + + private static void GenerateVectorOne(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + + Debug.Assert(!dest.Type.IsInteger()); + + context.Assembler.CmeqVector(dest, dest, dest, 2); + } + + private static void GenerateVectorZero(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + + Debug.Assert(!dest.Type.IsInteger()); + + context.Assembler.EorVector(dest, dest, dest); + } + + private static void GenerateVectorZeroUpper64(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128); + + context.Assembler.Fmov(Register(dest, OperandType.FP64), Register(source, OperandType.FP64)); + } + + private static void GenerateVectorZeroUpper96(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128); + + context.Assembler.Fmov(Register(dest, OperandType.FP32), Register(source, OperandType.FP32)); + } + + private static void GenerateZeroExtend16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Uxth(dest, source); + } + + private static void GenerateZeroExtend32(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + // We can eliminate the move if source is already 32-bit and the registers are the same. + if (dest.Value == source.Value && source.Type == OperandType.I32) + { + return; + } + + context.Assembler.Mov(Register(dest.GetRegister().Index, OperandType.I32), source); + } + + private static void GenerateZeroExtend8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Uxtb(dest, source); + } + + private static UnwindInfo WritePrologue(CodeGenContext context) + { + List<UnwindPushEntry> pushEntries = new List<UnwindPushEntry>(); + + Operand rsp = Register(SpRegister); + + int intMask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters; + int vecMask = CallingConvention.GetFpCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters; + + int intCalleeSavedRegsCount = BitOperations.PopCount((uint)intMask); + int vecCalleeSavedRegsCount = BitOperations.PopCount((uint)vecMask); + + int calleeSaveRegionSize = Align16(intCalleeSavedRegsCount * 8 + vecCalleeSavedRegsCount * 8); + + int offset = 0; + + WritePrologueCalleeSavesPreIndexed(context, pushEntries, ref intMask, ref offset, calleeSaveRegionSize, OperandType.I64); + WritePrologueCalleeSavesPreIndexed(context, pushEntries, ref vecMask, ref offset, calleeSaveRegionSize, OperandType.FP64); + + int localSize = Align16(context.AllocResult.SpillRegionSize + context.FpLrSaveRegionSize); + int outArgsSize = context.CallArgsRegionSize; + + if (CodeGenCommon.ConstFitsOnSImm7(localSize, DWordScale)) + { + if (context.HasCall) + { + context.Assembler.StpRiPre(Register(FpRegister), Register(LrRegister), rsp, -localSize); + context.Assembler.MovSp(Register(FpRegister), rsp); + } + + if (outArgsSize != 0) + { + context.Assembler.Sub(rsp, rsp, Const(OperandType.I64, outArgsSize)); + } + } + else + { + int frameSize = localSize + outArgsSize; + if (frameSize != 0) + { + if (CodeGenCommon.ConstFitsOnUImm12(frameSize)) + { + context.Assembler.Sub(rsp, rsp, Const(OperandType.I64, frameSize)); + } + else + { + Operand tempSize = Register(CodeGenCommon.ReservedRegister); + GenerateConstantCopy(context, tempSize, (ulong)frameSize); + context.Assembler.Sub(rsp, rsp, tempSize, ArmExtensionType.Uxtx); + } + } + + context.Assembler.StpRiUn(Register(FpRegister), Register(LrRegister), rsp, outArgsSize); + + if (outArgsSize != 0) + { + context.Assembler.Add(Register(FpRegister), Register(SpRegister), Const(OperandType.I64, outArgsSize)); + } + else + { + context.Assembler.MovSp(Register(FpRegister), Register(SpRegister)); + } + } + + return new UnwindInfo(pushEntries.ToArray(), context.StreamOffset); + } + + private static void WritePrologueCalleeSavesPreIndexed( + CodeGenContext context, + List<UnwindPushEntry> pushEntries, + ref int mask, + ref int offset, + int calleeSaveRegionSize, + OperandType type) + { + if ((BitOperations.PopCount((uint)mask) & 1) != 0) + { + int reg = BitOperations.TrailingZeroCount(mask); + + pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg)); + + mask &= ~(1 << reg); + + if (offset != 0) + { + context.Assembler.StrRiUn(Register(reg, type), Register(SpRegister), offset); + } + else + { + context.Assembler.StrRiPre(Register(reg, type), Register(SpRegister), -calleeSaveRegionSize); + } + + offset += type.GetSizeInBytes(); + } + + while (mask != 0) + { + int reg = BitOperations.TrailingZeroCount(mask); + + pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg)); + + mask &= ~(1 << reg); + + int reg2 = BitOperations.TrailingZeroCount(mask); + + pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: reg2)); + + mask &= ~(1 << reg2); + + if (offset != 0) + { + context.Assembler.StpRiUn(Register(reg, type), Register(reg2, type), Register(SpRegister), offset); + } + else + { + context.Assembler.StpRiPre(Register(reg, type), Register(reg2, type), Register(SpRegister), -calleeSaveRegionSize); + } + + offset += type.GetSizeInBytes() * 2; + } + } + + private static void WriteEpilogue(CodeGenContext context) + { + Operand rsp = Register(SpRegister); + + int localSize = Align16(context.AllocResult.SpillRegionSize + context.FpLrSaveRegionSize); + int outArgsSize = context.CallArgsRegionSize; + + if (CodeGenCommon.ConstFitsOnSImm7(localSize, DWordScale)) + { + if (outArgsSize != 0) + { + context.Assembler.Add(rsp, rsp, Const(OperandType.I64, outArgsSize)); + } + + if (context.HasCall) + { + context.Assembler.LdpRiPost(Register(FpRegister), Register(LrRegister), rsp, localSize); + } + } + else + { + if (context.HasCall) + { + context.Assembler.LdpRiUn(Register(FpRegister), Register(LrRegister), rsp, outArgsSize); + } + + int frameSize = localSize + outArgsSize; + if (frameSize != 0) + { + if (CodeGenCommon.ConstFitsOnUImm12(frameSize)) + { + context.Assembler.Add(rsp, rsp, Const(OperandType.I64, frameSize)); + } + else + { + Operand tempSize = Register(CodeGenCommon.ReservedRegister); + GenerateConstantCopy(context, tempSize, (ulong)frameSize); + context.Assembler.Add(rsp, rsp, tempSize, ArmExtensionType.Uxtx); + } + } + } + + int intMask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters; + int vecMask = CallingConvention.GetFpCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters; + + int intCalleeSavedRegsCount = BitOperations.PopCount((uint)intMask); + int vecCalleeSavedRegsCount = BitOperations.PopCount((uint)vecMask); + + int offset = intCalleeSavedRegsCount * 8 + vecCalleeSavedRegsCount * 8; + int calleeSaveRegionSize = Align16(offset); + + WriteEpilogueCalleeSavesPostIndexed(context, ref vecMask, ref offset, calleeSaveRegionSize, OperandType.FP64); + WriteEpilogueCalleeSavesPostIndexed(context, ref intMask, ref offset, calleeSaveRegionSize, OperandType.I64); + } + + private static void WriteEpilogueCalleeSavesPostIndexed( + CodeGenContext context, + ref int mask, + ref int offset, + int calleeSaveRegionSize, + OperandType type) + { + while (mask != 0) + { + int reg = BitUtils.HighestBitSet(mask); + + mask &= ~(1 << reg); + + if (mask != 0) + { + int reg2 = BitUtils.HighestBitSet(mask); + + mask &= ~(1 << reg2); + + offset -= type.GetSizeInBytes() * 2; + + if (offset != 0) + { + context.Assembler.LdpRiUn(Register(reg2, type), Register(reg, type), Register(SpRegister), offset); + } + else + { + context.Assembler.LdpRiPost(Register(reg2, type), Register(reg, type), Register(SpRegister), calleeSaveRegionSize); + } + } + else + { + offset -= type.GetSizeInBytes(); + + if (offset != 0) + { + context.Assembler.LdrRiUn(Register(reg, type), Register(SpRegister), offset); + } + else + { + context.Assembler.LdrRiPost(Register(reg, type), Register(SpRegister), calleeSaveRegionSize); + } + } + } + } + + private static void GenerateConstantCopy(CodeGenContext context, Operand dest, ulong value) + { + if (value == 0) + { + context.Assembler.Mov(dest, Register(ZrRegister, dest.Type)); + } + else if (CodeGenCommon.TryEncodeBitMask(dest.Type, value, out _, out _, out _)) + { + context.Assembler.Orr(dest, Register(ZrRegister, dest.Type), Const(dest.Type, (long)value)); + } + else + { + int hw = 0; + bool first = true; + + while (value != 0) + { + int valueLow = (ushort)value; + if (valueLow != 0) + { + if (first) + { + context.Assembler.Movz(dest, valueLow, hw); + first = false; + } + else + { + context.Assembler.Movk(dest, valueLow, hw); + } + } + + hw++; + value >>= 16; + } + } + } + + private static void GenerateAtomicCas( + CodeGenContext context, + Operand address, + Operand expected, + Operand desired, + Operand actual, + Operand result, + AccessSize accessSize) + { + int startOffset = context.StreamOffset; + + switch (accessSize) + { + case AccessSize.Byte: + context.Assembler.Ldaxrb(actual, address); + break; + case AccessSize.Hword: + context.Assembler.Ldaxrh(actual, address); + break; + default: + context.Assembler.Ldaxr(actual, address); + break; + } + + context.Assembler.Cmp(actual, expected); + + context.JumpToNear(ArmCondition.Ne); + + switch (accessSize) + { + case AccessSize.Byte: + context.Assembler.Stlxrb(desired, address, result); + break; + case AccessSize.Hword: + context.Assembler.Stlxrh(desired, address, result); + break; + default: + context.Assembler.Stlxr(desired, address, result); + break; + } + + context.Assembler.Cbnz(result, startOffset - context.StreamOffset); // Retry if store failed. + + context.JumpHere(); + + context.Assembler.Clrex(); + } + + private static void GenerateAtomicDcas( + CodeGenContext context, + Operand address, + Operand expectedLow, + Operand expectedHigh, + Operand desiredLow, + Operand desiredHigh, + Operand actualLow, + Operand actualHigh, + Operand temp0, + Operand temp1) + { + int startOffset = context.StreamOffset; + + context.Assembler.Ldaxp(actualLow, actualHigh, address); + context.Assembler.Eor(temp0, actualHigh, expectedHigh); + context.Assembler.Eor(temp1, actualLow, expectedLow); + context.Assembler.Orr(temp0, temp1, temp0); + + context.JumpToNearIfNotZero(temp0); + + Operand result = Register(temp0, OperandType.I32); + + context.Assembler.Stlxp(desiredLow, desiredHigh, address, result); + context.Assembler.Cbnz(result, startOffset - context.StreamOffset); // Retry if store failed. + + context.JumpHere(); + + context.Assembler.Clrex(); + } + + private static bool TryPairMemoryOp(CodeGenContext context, Operation currentOp, Operation nextOp) + { + if (!TryGetMemOpBaseAndOffset(currentOp, out Operand op1Base, out int op1Offset)) + { + return false; + } + + if (!TryGetMemOpBaseAndOffset(nextOp, out Operand op2Base, out int op2Offset)) + { + return false; + } + + if (op1Base != op2Base) + { + return false; + } + + OperandType valueType = GetMemOpValueType(currentOp); + + if (valueType != GetMemOpValueType(nextOp) || op1Offset + valueType.GetSizeInBytes() != op2Offset) + { + return false; + } + + if (!CodeGenCommon.ConstFitsOnSImm7(op1Offset, valueType.GetSizeInBytesLog2())) + { + return false; + } + + if (currentOp.Instruction == Instruction.Load) + { + context.Assembler.LdpRiUn(currentOp.Destination, nextOp.Destination, op1Base, op1Offset); + } + else if (currentOp.Instruction == Instruction.Store) + { + context.Assembler.StpRiUn(currentOp.GetSource(1), nextOp.GetSource(1), op1Base, op1Offset); + } + else + { + return false; + } + + return true; + } + + private static bool IsLoadOrStore(Operation operation) + { + return operation.Instruction == Instruction.Load || operation.Instruction == Instruction.Store; + } + + private static OperandType GetMemOpValueType(Operation operation) + { + if (operation.Destination != default) + { + return operation.Destination.Type; + } + + return operation.GetSource(1).Type; + } + + private static bool TryGetMemOpBaseAndOffset(Operation operation, out Operand baseAddress, out int offset) + { + baseAddress = default; + offset = 0; + Operand address = operation.GetSource(0); + + if (address.Kind != OperandKind.Memory) + { + return false; + } + + MemoryOperand memOp = address.GetMemory(); + Operand baseOp = memOp.BaseAddress; + + if (baseOp == default) + { + baseOp = memOp.Index; + + if (baseOp == default || memOp.Scale != Multiplier.x1) + { + return false; + } + } + if (memOp.Index != default) + { + return false; + } + + baseAddress = memOp.BaseAddress; + offset = memOp.Displacement; + + return true; + } + + private static Operand Register(Operand operand, OperandType type = OperandType.I64) + { + return Register(operand.GetRegister().Index, type); + } + + private static Operand Register(int register, OperandType type = OperandType.I64) + { + return Factory.Register(register, RegisterType.Integer, type); + } + + private static int Align16(int value) + { + return (value + 0xf) & ~0xf; + } + + [Conditional("DEBUG")] + private static void ValidateUnOp(Operand dest, Operand source) + { + // Destination and source aren't forced to be equals + // EnsureSameReg (dest, source); + EnsureSameType(dest, source); + } + + [Conditional("DEBUG")] + private static void ValidateBinOp(Operand dest, Operand src1, Operand src2) + { + // Destination and source aren't forced to be equals + // EnsureSameReg (dest, src1); + EnsureSameType(dest, src1, src2); + } + + [Conditional("DEBUG")] + private static void ValidateShift(Operand dest, Operand src1, Operand src2) + { + // Destination and source aren't forced to be equals + // EnsureSameReg (dest, src1); + EnsureSameType(dest, src1); + + Debug.Assert(dest.Type.IsInteger() && src2.Type == OperandType.I32); + } + + private static void EnsureSameReg(Operand op1, Operand op2) + { + Debug.Assert(op1.Kind == OperandKind.Register || op1.Kind == OperandKind.Memory); + Debug.Assert(op1.Kind == op2.Kind); + Debug.Assert(op1.Value == op2.Value); + } + + private static void EnsureSameType(Operand op1, Operand op2) + { + Debug.Assert(op1.Type == op2.Type); + } + + private static void EnsureSameType(Operand op1, Operand op2, Operand op3) + { + Debug.Assert(op1.Type == op2.Type); + Debug.Assert(op1.Type == op3.Type); + } + + private static void EnsureSameType(Operand op1, Operand op2, Operand op3, Operand op4) + { + Debug.Assert(op1.Type == op2.Type); + Debug.Assert(op1.Type == op3.Type); + Debug.Assert(op1.Type == op4.Type); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs b/src/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs new file mode 100644 index 00000000..aaa00bb6 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/CodeGeneratorIntrinsic.cs @@ -0,0 +1,662 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Diagnostics; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class CodeGeneratorIntrinsic + { + public static void GenerateOperation(CodeGenContext context, Operation operation) + { + Intrinsic intrin = operation.Intrinsic; + + IntrinsicInfo info = IntrinsicTable.GetInfo(intrin & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask)); + + switch (info.Type) + { + case IntrinsicType.ScalarUnary: + GenerateVectorUnary( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0)); + break; + case IntrinsicType.ScalarUnaryByElem: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorUnaryByElem( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(1).AsInt32(), + operation.Destination, + operation.GetSource(0)); + break; + case IntrinsicType.ScalarBinary: + GenerateVectorBinary( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.ScalarBinaryFPByElem: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryFPByElem( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(2).AsInt32(), + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.ScalarBinaryRd: + GenerateVectorUnary( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1)); + break; + case IntrinsicType.ScalarBinaryShl: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorBinaryShlImm( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.ScalarBinaryShr: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorBinaryShrImm( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.ScalarFPCompare: + GenerateScalarFPCompare( + context, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.ScalarFPConvFixed: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorBinaryShrImm( + context, + 0, + ((uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift) + 2u, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.ScalarFPConvFixedGpr: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateScalarFPConvGpr( + context, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.ScalarFPConvGpr: + GenerateScalarFPConvGpr( + context, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0)); + break; + case IntrinsicType.ScalarTernary: + GenerateScalarTernary( + context, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + operation.GetSource(2), + operation.GetSource(0)); + break; + case IntrinsicType.ScalarTernaryFPRdByElem: + Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant); + + GenerateVectorBinaryFPByElem( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(3).AsInt32(), + operation.Destination, + operation.GetSource(1), + operation.GetSource(2)); + break; + case IntrinsicType.ScalarTernaryShlRd: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryShlImm( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + (uint)operation.GetSource(2).AsInt32()); + break; + case IntrinsicType.ScalarTernaryShrRd: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryShrImm( + context, + 0, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + (uint)operation.GetSource(2).AsInt32()); + break; + + case IntrinsicType.VectorUnary: + GenerateVectorUnary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0)); + break; + case IntrinsicType.VectorUnaryByElem: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorUnaryByElem( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(1).AsInt32(), + operation.Destination, + operation.GetSource(0)); + break; + case IntrinsicType.VectorBinary: + GenerateVectorBinary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.VectorBinaryBitwise: + GenerateVectorBinary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.VectorBinaryByElem: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryByElem( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(2).AsInt32(), + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.VectorBinaryFPByElem: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryFPByElem( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(2).AsInt32(), + operation.Destination, + operation.GetSource(0), + operation.GetSource(1)); + break; + case IntrinsicType.VectorBinaryRd: + GenerateVectorUnary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1)); + break; + case IntrinsicType.VectorBinaryShl: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorBinaryShlImm( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.VectorBinaryShr: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorBinaryShrImm( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.VectorFPConvFixed: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + + GenerateVectorBinaryShrImm( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + ((uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift) + 2u, + info.Inst, + operation.Destination, + operation.GetSource(0), + (uint)operation.GetSource(1).AsInt32()); + break; + case IntrinsicType.VectorInsertByElem: + Debug.Assert(operation.GetSource(1).Kind == OperandKind.Constant); + Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant); + + GenerateVectorInsertByElem( + context, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(3).AsInt32(), + (uint)operation.GetSource(1).AsInt32(), + operation.Destination, + operation.GetSource(2)); + break; + case IntrinsicType.VectorLookupTable: + Debug.Assert((uint)(operation.SourcesCount - 2) <= 3); + + for (int i = 1; i < operation.SourcesCount - 1; i++) + { + Register currReg = operation.GetSource(i).GetRegister(); + Register prevReg = operation.GetSource(i - 1).GetRegister(); + + Debug.Assert(prevReg.Index + 1 == currReg.Index && currReg.Type == RegisterType.Vector); + } + + GenerateVectorBinary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + info.Inst | ((uint)(operation.SourcesCount - 2) << 13), + operation.Destination, + operation.GetSource(0), + operation.GetSource(operation.SourcesCount - 1)); + break; + case IntrinsicType.VectorTernaryFPRdByElem: + Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant); + + GenerateVectorBinaryFPByElem( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(3).AsInt32(), + operation.Destination, + operation.GetSource(1), + operation.GetSource(2)); + break; + case IntrinsicType.VectorTernaryRd: + GenerateVectorBinary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + operation.GetSource(2)); + break; + case IntrinsicType.VectorTernaryRdBitwise: + GenerateVectorBinary( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + operation.GetSource(2)); + break; + case IntrinsicType.VectorTernaryRdByElem: + Debug.Assert(operation.GetSource(3).Kind == OperandKind.Constant); + + GenerateVectorBinaryByElem( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + (uint)operation.GetSource(3).AsInt32(), + operation.Destination, + operation.GetSource(1), + operation.GetSource(2)); + break; + case IntrinsicType.VectorTernaryShlRd: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryShlImm( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + (uint)operation.GetSource(2).AsInt32()); + break; + case IntrinsicType.VectorTernaryShrRd: + Debug.Assert(operation.GetSource(2).Kind == OperandKind.Constant); + + GenerateVectorBinaryShrImm( + context, + (uint)(intrin & Intrinsic.Arm64VTypeMask) >> (int)Intrinsic.Arm64VTypeShift, + (uint)(intrin & Intrinsic.Arm64VSizeMask) >> (int)Intrinsic.Arm64VSizeShift, + info.Inst, + operation.Destination, + operation.GetSource(1), + (uint)operation.GetSource(2).AsInt32()); + break; + + case IntrinsicType.GetRegister: + context.Assembler.WriteInstruction(info.Inst, operation.Destination); + break; + case IntrinsicType.SetRegister: + context.Assembler.WriteInstruction(info.Inst, operation.GetSource(0)); + break; + + default: + throw new NotImplementedException(info.Type.ToString()); + } + } + + private static void GenerateScalarFPCompare( + CodeGenContext context, + uint sz, + uint instruction, + Operand dest, + Operand rn, + Operand rm) + { + instruction |= (sz << 22); + + if (rm.Kind == OperandKind.Constant && rm.Value == 0) + { + instruction |= 0b1000; + rm = rn; + } + + context.Assembler.WriteInstructionRm16NoRet(instruction, rn, rm); + context.Assembler.Mrs(dest, 1, 3, 4, 2, 0); + } + + private static void GenerateScalarFPConvGpr( + CodeGenContext context, + uint sz, + uint instruction, + Operand rd, + Operand rn) + { + instruction |= (sz << 22); + + if (rd.Type.IsInteger()) + { + context.Assembler.WriteInstructionAuto(instruction, rd, rn); + } + else + { + if (rn.Type == OperandType.I64) + { + instruction |= Assembler.SfFlag; + } + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + } + + private static void GenerateScalarFPConvGpr( + CodeGenContext context, + uint sz, + uint instruction, + Operand rd, + Operand rn, + uint fBits) + { + Debug.Assert(fBits <= 64); + + instruction |= (sz << 22); + instruction |= (64 - fBits) << 10; + + if (rd.Type.IsInteger()) + { + Debug.Assert(rd.Type != OperandType.I32 || fBits <= 32); + + context.Assembler.WriteInstructionAuto(instruction, rd, rn); + } + else + { + if (rn.Type == OperandType.I64) + { + instruction |= Assembler.SfFlag; + } + else + { + Debug.Assert(fBits <= 32); + } + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + + } + + private static void GenerateScalarTernary( + CodeGenContext context, + uint sz, + uint instruction, + Operand rd, + Operand rn, + Operand rm, + Operand ra) + { + instruction |= (sz << 22); + + context.Assembler.WriteInstruction(instruction, rd, rn, rm, ra); + } + + private static void GenerateVectorUnary( + CodeGenContext context, + uint q, + uint sz, + uint instruction, + Operand rd, + Operand rn) + { + instruction |= (q << 30) | (sz << 22); + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + + private static void GenerateVectorUnaryByElem( + CodeGenContext context, + uint q, + uint sz, + uint instruction, + uint srcIndex, + Operand rd, + Operand rn) + { + uint imm5 = (srcIndex << ((int)sz + 1)) | (1u << (int)sz); + + instruction |= (q << 30) | (imm5 << 16); + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + + private static void GenerateVectorBinary( + CodeGenContext context, + uint q, + uint instruction, + Operand rd, + Operand rn, + Operand rm) + { + instruction |= (q << 30); + + context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm); + } + + private static void GenerateVectorBinary( + CodeGenContext context, + uint q, + uint sz, + uint instruction, + Operand rd, + Operand rn, + Operand rm) + { + instruction |= (q << 30) | (sz << 22); + + context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm); + } + + private static void GenerateVectorBinaryByElem( + CodeGenContext context, + uint q, + uint size, + uint instruction, + uint srcIndex, + Operand rd, + Operand rn, + Operand rm) + { + instruction |= (q << 30) | (size << 22); + + if (size == 2) + { + instruction |= ((srcIndex & 1) << 21) | ((srcIndex & 2) << 10); + } + else + { + instruction |= ((srcIndex & 3) << 20) | ((srcIndex & 4) << 9); + } + + context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm); + } + + private static void GenerateVectorBinaryFPByElem( + CodeGenContext context, + uint q, + uint sz, + uint instruction, + uint srcIndex, + Operand rd, + Operand rn, + Operand rm) + { + instruction |= (q << 30) | (sz << 22); + + if (sz != 0) + { + instruction |= (srcIndex & 1) << 11; + } + else + { + instruction |= ((srcIndex & 1) << 21) | ((srcIndex & 2) << 10); + } + + context.Assembler.WriteInstructionRm16(instruction, rd, rn, rm); + } + + private static void GenerateVectorBinaryShlImm( + CodeGenContext context, + uint q, + uint sz, + uint instruction, + Operand rd, + Operand rn, + uint shift) + { + instruction |= (q << 30); + + Debug.Assert(shift >= 0 && shift < (8u << (int)sz)); + + uint imm = (8u << (int)sz) | (shift & (0x3fu >> (int)(3 - sz))); + + instruction |= (imm << 16); + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + + private static void GenerateVectorBinaryShrImm( + CodeGenContext context, + uint q, + uint sz, + uint instruction, + Operand rd, + Operand rn, + uint shift) + { + instruction |= (q << 30); + + Debug.Assert(shift > 0 && shift <= (8u << (int)sz)); + + uint imm = (8u << (int)sz) | ((8u << (int)sz) - shift); + + instruction |= (imm << 16); + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + + private static void GenerateVectorInsertByElem( + CodeGenContext context, + uint sz, + uint instruction, + uint srcIndex, + uint dstIndex, + Operand rd, + Operand rn) + { + uint imm4 = srcIndex << (int)sz; + uint imm5 = (dstIndex << ((int)sz + 1)) | (1u << (int)sz); + + instruction |= imm4 << 11; + instruction |= imm5 << 16; + + context.Assembler.WriteInstruction(instruction, rd, rn); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs b/src/ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs new file mode 100644 index 00000000..99ff299e --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/HardwareCapabilities.cs @@ -0,0 +1,185 @@ +using System; +using System.Linq; +using System.Reflection; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.Arm; +using System.Runtime.Versioning; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static partial class HardwareCapabilities + { + static HardwareCapabilities() + { + if (!ArmBase.Arm64.IsSupported) + { + return; + } + + if (OperatingSystem.IsLinux()) + { + LinuxFeatureInfoHwCap = (LinuxFeatureFlagsHwCap)getauxval(AT_HWCAP); + LinuxFeatureInfoHwCap2 = (LinuxFeatureFlagsHwCap2)getauxval(AT_HWCAP2); + } + + if (OperatingSystem.IsMacOS()) + { + for (int i = 0; i < _sysctlNames.Length; i++) + { + if (CheckSysctlName(_sysctlNames[i])) + { + MacOsFeatureInfo |= (MacOsFeatureFlags)(1 << i); + } + } + } + } + +#region Linux + + private const ulong AT_HWCAP = 16; + private const ulong AT_HWCAP2 = 26; + + [LibraryImport("libc", SetLastError = true)] + private static partial ulong getauxval(ulong type); + + [Flags] + public enum LinuxFeatureFlagsHwCap : ulong + { + Fp = 1 << 0, + Asimd = 1 << 1, + Evtstrm = 1 << 2, + Aes = 1 << 3, + Pmull = 1 << 4, + Sha1 = 1 << 5, + Sha2 = 1 << 6, + Crc32 = 1 << 7, + Atomics = 1 << 8, + FpHp = 1 << 9, + AsimdHp = 1 << 10, + CpuId = 1 << 11, + AsimdRdm = 1 << 12, + Jscvt = 1 << 13, + Fcma = 1 << 14, + Lrcpc = 1 << 15, + DcpOp = 1 << 16, + Sha3 = 1 << 17, + Sm3 = 1 << 18, + Sm4 = 1 << 19, + AsimdDp = 1 << 20, + Sha512 = 1 << 21, + Sve = 1 << 22, + AsimdFhm = 1 << 23, + Dit = 1 << 24, + Uscat = 1 << 25, + Ilrcpc = 1 << 26, + FlagM = 1 << 27, + Ssbs = 1 << 28, + Sb = 1 << 29, + Paca = 1 << 30, + Pacg = 1UL << 31 + } + + [Flags] + public enum LinuxFeatureFlagsHwCap2 : ulong + { + Dcpodp = 1 << 0, + Sve2 = 1 << 1, + SveAes = 1 << 2, + SvePmull = 1 << 3, + SveBitperm = 1 << 4, + SveSha3 = 1 << 5, + SveSm4 = 1 << 6, + FlagM2 = 1 << 7, + Frint = 1 << 8, + SveI8mm = 1 << 9, + SveF32mm = 1 << 10, + SveF64mm = 1 << 11, + SveBf16 = 1 << 12, + I8mm = 1 << 13, + Bf16 = 1 << 14, + Dgh = 1 << 15, + Rng = 1 << 16, + Bti = 1 << 17, + Mte = 1 << 18, + Ecv = 1 << 19, + Afp = 1 << 20, + Rpres = 1 << 21, + Mte3 = 1 << 22, + Sme = 1 << 23, + Sme_i16i64 = 1 << 24, + Sme_f64f64 = 1 << 25, + Sme_i8i32 = 1 << 26, + Sme_f16f32 = 1 << 27, + Sme_b16f32 = 1 << 28, + Sme_f32f32 = 1 << 29, + Sme_fa64 = 1 << 30, + Wfxt = 1UL << 31, + Ebf16 = 1UL << 32, + Sve_Ebf16 = 1UL << 33, + Cssc = 1UL << 34, + Rprfm = 1UL << 35, + Sve2p1 = 1UL << 36 + } + + public static LinuxFeatureFlagsHwCap LinuxFeatureInfoHwCap { get; } = 0; + public static LinuxFeatureFlagsHwCap2 LinuxFeatureInfoHwCap2 { get; } = 0; + +#endregion + +#region macOS + + [LibraryImport("libSystem.dylib", SetLastError = true)] + private static unsafe partial int sysctlbyname([MarshalAs(UnmanagedType.LPStr)] string name, out int oldValue, ref ulong oldSize, IntPtr newValue, ulong newValueSize); + + [SupportedOSPlatform("macos")] + private static bool CheckSysctlName(string name) + { + ulong size = sizeof(int); + if (sysctlbyname(name, out int val, ref size, IntPtr.Zero, 0) == 0 && size == sizeof(int)) + { + return val != 0; + } + return false; + } + + private static string[] _sysctlNames = new string[] + { + "hw.optional.floatingpoint", + "hw.optional.AdvSIMD", + "hw.optional.arm.FEAT_FP16", + "hw.optional.arm.FEAT_AES", + "hw.optional.arm.FEAT_PMULL", + "hw.optional.arm.FEAT_LSE", + "hw.optional.armv8_crc32", + "hw.optional.arm.FEAT_SHA1", + "hw.optional.arm.FEAT_SHA256" + }; + + [Flags] + public enum MacOsFeatureFlags + { + Fp = 1 << 0, + AdvSimd = 1 << 1, + Fp16 = 1 << 2, + Aes = 1 << 3, + Pmull = 1 << 4, + Lse = 1 << 5, + Crc32 = 1 << 6, + Sha1 = 1 << 7, + Sha256 = 1 << 8 + } + + public static MacOsFeatureFlags MacOsFeatureInfo { get; } = 0; + +#endregion + + public static bool SupportsAdvSimd => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Asimd) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.AdvSimd); + public static bool SupportsAes => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Aes) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Aes); + public static bool SupportsPmull => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Pmull) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Pmull); + public static bool SupportsLse => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Atomics) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Lse); + public static bool SupportsCrc32 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Crc32) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Crc32); + public static bool SupportsSha1 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Sha1) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Sha1); + public static bool SupportsSha256 => LinuxFeatureInfoHwCap.HasFlag(LinuxFeatureFlagsHwCap.Sha2) || MacOsFeatureInfo.HasFlag(MacOsFeatureFlags.Sha256); + } +} diff --git a/src/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs b/src/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs new file mode 100644 index 00000000..8695db90 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/IntrinsicInfo.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.CodeGen.Arm64 +{ + struct IntrinsicInfo + { + public uint Inst { get; } + public IntrinsicType Type { get; } + + public IntrinsicInfo(uint inst, IntrinsicType type) + { + Inst = inst; + Type = type; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs b/src/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs new file mode 100644 index 00000000..a309d56d --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/IntrinsicTable.cs @@ -0,0 +1,463 @@ +using ARMeilleure.Common; +using ARMeilleure.IntermediateRepresentation; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class IntrinsicTable + { + private static IntrinsicInfo[] _intrinTable; + + static IntrinsicTable() + { + _intrinTable = new IntrinsicInfo[EnumUtils.GetCount(typeof(Intrinsic))]; + + Add(Intrinsic.Arm64AbsS, new IntrinsicInfo(0x5e20b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64AbsV, new IntrinsicInfo(0x0e20b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64AddhnV, new IntrinsicInfo(0x0e204000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64AddpS, new IntrinsicInfo(0x5e31b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64AddpV, new IntrinsicInfo(0x0e20bc00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64AddvV, new IntrinsicInfo(0x0e31b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64AddS, new IntrinsicInfo(0x5e208400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64AddV, new IntrinsicInfo(0x0e208400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64AesdV, new IntrinsicInfo(0x4e285800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64AeseV, new IntrinsicInfo(0x4e284800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64AesimcV, new IntrinsicInfo(0x4e287800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64AesmcV, new IntrinsicInfo(0x4e286800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64AndV, new IntrinsicInfo(0x0e201c00u, IntrinsicType.VectorBinaryBitwise)); + Add(Intrinsic.Arm64BicVi, new IntrinsicInfo(0x2f001400u, IntrinsicType.VectorBinaryBitwiseImm)); + Add(Intrinsic.Arm64BicV, new IntrinsicInfo(0x0e601c00u, IntrinsicType.VectorBinaryBitwise)); + Add(Intrinsic.Arm64BifV, new IntrinsicInfo(0x2ee01c00u, IntrinsicType.VectorTernaryRdBitwise)); + Add(Intrinsic.Arm64BitV, new IntrinsicInfo(0x2ea01c00u, IntrinsicType.VectorTernaryRdBitwise)); + Add(Intrinsic.Arm64BslV, new IntrinsicInfo(0x2e601c00u, IntrinsicType.VectorTernaryRdBitwise)); + Add(Intrinsic.Arm64ClsV, new IntrinsicInfo(0x0e204800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64ClzV, new IntrinsicInfo(0x2e204800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64CmeqS, new IntrinsicInfo(0x7e208c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64CmeqV, new IntrinsicInfo(0x2e208c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64CmeqSz, new IntrinsicInfo(0x5e209800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64CmeqVz, new IntrinsicInfo(0x0e209800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64CmgeS, new IntrinsicInfo(0x5e203c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64CmgeV, new IntrinsicInfo(0x0e203c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64CmgeSz, new IntrinsicInfo(0x7e208800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64CmgeVz, new IntrinsicInfo(0x2e208800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64CmgtS, new IntrinsicInfo(0x5e203400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64CmgtV, new IntrinsicInfo(0x0e203400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64CmgtSz, new IntrinsicInfo(0x5e208800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64CmgtVz, new IntrinsicInfo(0x0e208800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64CmhiS, new IntrinsicInfo(0x7e203400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64CmhiV, new IntrinsicInfo(0x2e203400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64CmhsS, new IntrinsicInfo(0x7e203c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64CmhsV, new IntrinsicInfo(0x2e203c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64CmleSz, new IntrinsicInfo(0x7e209800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64CmleVz, new IntrinsicInfo(0x2e209800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64CmltSz, new IntrinsicInfo(0x5e20a800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64CmltVz, new IntrinsicInfo(0x0e20a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64CmtstS, new IntrinsicInfo(0x5e208c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64CmtstV, new IntrinsicInfo(0x0e208c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64CntV, new IntrinsicInfo(0x0e205800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64DupSe, new IntrinsicInfo(0x5e000400u, IntrinsicType.ScalarUnaryByElem)); + Add(Intrinsic.Arm64DupVe, new IntrinsicInfo(0x0e000400u, IntrinsicType.VectorUnaryByElem)); + Add(Intrinsic.Arm64DupGp, new IntrinsicInfo(0x0e000c00u, IntrinsicType.VectorUnaryByElem)); + Add(Intrinsic.Arm64EorV, new IntrinsicInfo(0x2e201c00u, IntrinsicType.VectorBinaryBitwise)); + Add(Intrinsic.Arm64ExtV, new IntrinsicInfo(0x2e000000u, IntrinsicType.VectorExt)); + Add(Intrinsic.Arm64FabdS, new IntrinsicInfo(0x7ea0d400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FabdV, new IntrinsicInfo(0x2ea0d400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FabsV, new IntrinsicInfo(0x0ea0f800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FabsS, new IntrinsicInfo(0x1e20c000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FacgeS, new IntrinsicInfo(0x7e20ec00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FacgeV, new IntrinsicInfo(0x2e20ec00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FacgtS, new IntrinsicInfo(0x7ea0ec00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FacgtV, new IntrinsicInfo(0x2ea0ec00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FaddpS, new IntrinsicInfo(0x7e30d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FaddpV, new IntrinsicInfo(0x2e20d400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FaddV, new IntrinsicInfo(0x0e20d400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FaddS, new IntrinsicInfo(0x1e202800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FccmpeS, new IntrinsicInfo(0x1e200410u, IntrinsicType.ScalarFPCompareCond)); + Add(Intrinsic.Arm64FccmpS, new IntrinsicInfo(0x1e200400u, IntrinsicType.ScalarFPCompareCond)); + Add(Intrinsic.Arm64FcmeqS, new IntrinsicInfo(0x5e20e400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FcmeqV, new IntrinsicInfo(0x0e20e400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FcmeqSz, new IntrinsicInfo(0x5ea0d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcmeqVz, new IntrinsicInfo(0x0ea0d800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcmgeS, new IntrinsicInfo(0x7e20e400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FcmgeV, new IntrinsicInfo(0x2e20e400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FcmgeSz, new IntrinsicInfo(0x7ea0c800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcmgeVz, new IntrinsicInfo(0x2ea0c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcmgtS, new IntrinsicInfo(0x7ea0e400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FcmgtV, new IntrinsicInfo(0x2ea0e400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FcmgtSz, new IntrinsicInfo(0x5ea0c800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcmgtVz, new IntrinsicInfo(0x0ea0c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcmleSz, new IntrinsicInfo(0x7ea0d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcmleVz, new IntrinsicInfo(0x2ea0d800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcmltSz, new IntrinsicInfo(0x5ea0e800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcmltVz, new IntrinsicInfo(0x0ea0e800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcmpeS, new IntrinsicInfo(0x1e202010u, IntrinsicType.ScalarFPCompare)); + Add(Intrinsic.Arm64FcmpS, new IntrinsicInfo(0x1e202000u, IntrinsicType.ScalarFPCompare)); + Add(Intrinsic.Arm64FcselS, new IntrinsicInfo(0x1e200c00u, IntrinsicType.ScalarFcsel)); + Add(Intrinsic.Arm64FcvtasS, new IntrinsicInfo(0x5e21c800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtasV, new IntrinsicInfo(0x0e21c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtasGp, new IntrinsicInfo(0x1e240000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtauS, new IntrinsicInfo(0x7e21c800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtauV, new IntrinsicInfo(0x2e21c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtauGp, new IntrinsicInfo(0x1e250000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtlV, new IntrinsicInfo(0x0e217800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtmsS, new IntrinsicInfo(0x5e21b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtmsV, new IntrinsicInfo(0x0e21b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtmsGp, new IntrinsicInfo(0x1e300000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtmuS, new IntrinsicInfo(0x7e21b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtmuV, new IntrinsicInfo(0x2e21b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtmuGp, new IntrinsicInfo(0x1e310000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtnsS, new IntrinsicInfo(0x5e21a800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtnsV, new IntrinsicInfo(0x0e21a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtnsGp, new IntrinsicInfo(0x1e200000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtnuS, new IntrinsicInfo(0x7e21a800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtnuV, new IntrinsicInfo(0x2e21a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtnuGp, new IntrinsicInfo(0x1e210000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtnV, new IntrinsicInfo(0x0e216800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64FcvtpsS, new IntrinsicInfo(0x5ea1a800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtpsV, new IntrinsicInfo(0x0ea1a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtpsGp, new IntrinsicInfo(0x1e280000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtpuS, new IntrinsicInfo(0x7ea1a800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtpuV, new IntrinsicInfo(0x2ea1a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtpuGp, new IntrinsicInfo(0x1e290000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtxnS, new IntrinsicInfo(0x7e216800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtxnV, new IntrinsicInfo(0x2e216800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtzsSFixed, new IntrinsicInfo(0x5f00fc00u, IntrinsicType.ScalarFPConvFixed)); + Add(Intrinsic.Arm64FcvtzsVFixed, new IntrinsicInfo(0x0f00fc00u, IntrinsicType.VectorFPConvFixed)); + Add(Intrinsic.Arm64FcvtzsS, new IntrinsicInfo(0x5ea1b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtzsV, new IntrinsicInfo(0x0ea1b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtzsGpFixed, new IntrinsicInfo(0x1e180000u, IntrinsicType.ScalarFPConvFixedGpr)); + Add(Intrinsic.Arm64FcvtzsGp, new IntrinsicInfo(0x1e380000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtzuSFixed, new IntrinsicInfo(0x7f00fc00u, IntrinsicType.ScalarFPConvFixed)); + Add(Intrinsic.Arm64FcvtzuVFixed, new IntrinsicInfo(0x2f00fc00u, IntrinsicType.VectorFPConvFixed)); + Add(Intrinsic.Arm64FcvtzuS, new IntrinsicInfo(0x7ea1b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FcvtzuV, new IntrinsicInfo(0x2ea1b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FcvtzuGpFixed, new IntrinsicInfo(0x1e190000u, IntrinsicType.ScalarFPConvFixedGpr)); + Add(Intrinsic.Arm64FcvtzuGp, new IntrinsicInfo(0x1e390000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FcvtS, new IntrinsicInfo(0x1e224000u, IntrinsicType.ScalarFPConv)); + Add(Intrinsic.Arm64FdivV, new IntrinsicInfo(0x2e20fc00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FdivS, new IntrinsicInfo(0x1e201800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FmaddS, new IntrinsicInfo(0x1f000000u, IntrinsicType.ScalarTernary)); + Add(Intrinsic.Arm64FmaxnmpS, new IntrinsicInfo(0x7e30c800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FmaxnmpV, new IntrinsicInfo(0x2e20c400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FmaxnmvV, new IntrinsicInfo(0x2e30c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FmaxnmV, new IntrinsicInfo(0x0e20c400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FmaxnmS, new IntrinsicInfo(0x1e206800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FmaxpS, new IntrinsicInfo(0x7e30f800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FmaxpV, new IntrinsicInfo(0x2e20f400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FmaxvV, new IntrinsicInfo(0x2e30f800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FmaxV, new IntrinsicInfo(0x0e20f400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FmaxS, new IntrinsicInfo(0x1e204800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FminnmpS, new IntrinsicInfo(0x7eb0c800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FminnmpV, new IntrinsicInfo(0x2ea0c400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FminnmvV, new IntrinsicInfo(0x2eb0c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FminnmV, new IntrinsicInfo(0x0ea0c400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FminnmS, new IntrinsicInfo(0x1e207800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FminpS, new IntrinsicInfo(0x7eb0f800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FminpV, new IntrinsicInfo(0x2ea0f400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FminvV, new IntrinsicInfo(0x2eb0f800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FminV, new IntrinsicInfo(0x0ea0f400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FminS, new IntrinsicInfo(0x1e205800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FmlaSe, new IntrinsicInfo(0x5f801000u, IntrinsicType.ScalarTernaryFPRdByElem)); + Add(Intrinsic.Arm64FmlaVe, new IntrinsicInfo(0x0f801000u, IntrinsicType.VectorTernaryFPRdByElem)); + Add(Intrinsic.Arm64FmlaV, new IntrinsicInfo(0x0e20cc00u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64FmlsSe, new IntrinsicInfo(0x5f805000u, IntrinsicType.ScalarTernaryFPRdByElem)); + Add(Intrinsic.Arm64FmlsVe, new IntrinsicInfo(0x0f805000u, IntrinsicType.VectorTernaryFPRdByElem)); + Add(Intrinsic.Arm64FmlsV, new IntrinsicInfo(0x0ea0cc00u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64FmovVi, new IntrinsicInfo(0x0f00f400u, IntrinsicType.VectorFmovi)); + Add(Intrinsic.Arm64FmovS, new IntrinsicInfo(0x1e204000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FmovGp, new IntrinsicInfo(0x1e260000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64FmovSi, new IntrinsicInfo(0x1e201000u, IntrinsicType.ScalarFmovi)); + Add(Intrinsic.Arm64FmsubS, new IntrinsicInfo(0x1f008000u, IntrinsicType.ScalarTernary)); + Add(Intrinsic.Arm64FmulxSe, new IntrinsicInfo(0x7f809000u, IntrinsicType.ScalarBinaryFPByElem)); + Add(Intrinsic.Arm64FmulxVe, new IntrinsicInfo(0x2f809000u, IntrinsicType.VectorBinaryFPByElem)); + Add(Intrinsic.Arm64FmulxS, new IntrinsicInfo(0x5e20dc00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FmulxV, new IntrinsicInfo(0x0e20dc00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FmulSe, new IntrinsicInfo(0x5f809000u, IntrinsicType.ScalarBinaryFPByElem)); + Add(Intrinsic.Arm64FmulVe, new IntrinsicInfo(0x0f809000u, IntrinsicType.VectorBinaryFPByElem)); + Add(Intrinsic.Arm64FmulV, new IntrinsicInfo(0x2e20dc00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FmulS, new IntrinsicInfo(0x1e200800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FnegV, new IntrinsicInfo(0x2ea0f800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FnegS, new IntrinsicInfo(0x1e214000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FnmaddS, new IntrinsicInfo(0x1f200000u, IntrinsicType.ScalarTernary)); + Add(Intrinsic.Arm64FnmsubS, new IntrinsicInfo(0x1f208000u, IntrinsicType.ScalarTernary)); + Add(Intrinsic.Arm64FnmulS, new IntrinsicInfo(0x1e208800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FrecpeS, new IntrinsicInfo(0x5ea1d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrecpeV, new IntrinsicInfo(0x0ea1d800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrecpsS, new IntrinsicInfo(0x5e20fc00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FrecpsV, new IntrinsicInfo(0x0e20fc00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FrecpxS, new IntrinsicInfo(0x5ea1f800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintaV, new IntrinsicInfo(0x2e218800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintaS, new IntrinsicInfo(0x1e264000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintiV, new IntrinsicInfo(0x2ea19800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintiS, new IntrinsicInfo(0x1e27c000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintmV, new IntrinsicInfo(0x0e219800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintmS, new IntrinsicInfo(0x1e254000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintnV, new IntrinsicInfo(0x0e218800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintnS, new IntrinsicInfo(0x1e244000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintpV, new IntrinsicInfo(0x0ea18800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintpS, new IntrinsicInfo(0x1e24c000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintxV, new IntrinsicInfo(0x2e219800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintxS, new IntrinsicInfo(0x1e274000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrintzV, new IntrinsicInfo(0x0ea19800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrintzS, new IntrinsicInfo(0x1e25c000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrsqrteS, new IntrinsicInfo(0x7ea1d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FrsqrteV, new IntrinsicInfo(0x2ea1d800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FrsqrtsS, new IntrinsicInfo(0x5ea0fc00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64FrsqrtsV, new IntrinsicInfo(0x0ea0fc00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FsqrtV, new IntrinsicInfo(0x2ea1f800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64FsqrtS, new IntrinsicInfo(0x1e21c000u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64FsubV, new IntrinsicInfo(0x0ea0d400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64FsubS, new IntrinsicInfo(0x1e203800u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64InsVe, new IntrinsicInfo(0x6e000400u, IntrinsicType.VectorInsertByElem)); + Add(Intrinsic.Arm64InsGp, new IntrinsicInfo(0x4e001c00u, IntrinsicType.ScalarUnaryByElem)); + Add(Intrinsic.Arm64Ld1rV, new IntrinsicInfo(0x0d40c000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld1Vms, new IntrinsicInfo(0x0c402000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld1Vss, new IntrinsicInfo(0x0d400000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64Ld2rV, new IntrinsicInfo(0x0d60c000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld2Vms, new IntrinsicInfo(0x0c408000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld2Vss, new IntrinsicInfo(0x0d600000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64Ld3rV, new IntrinsicInfo(0x0d40e000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld3Vms, new IntrinsicInfo(0x0c404000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld3Vss, new IntrinsicInfo(0x0d402000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64Ld4rV, new IntrinsicInfo(0x0d60e000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld4Vms, new IntrinsicInfo(0x0c400000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64Ld4Vss, new IntrinsicInfo(0x0d602000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64MlaVe, new IntrinsicInfo(0x2f000000u, IntrinsicType.VectorTernaryRdByElem)); + Add(Intrinsic.Arm64MlaV, new IntrinsicInfo(0x0e209400u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64MlsVe, new IntrinsicInfo(0x2f004000u, IntrinsicType.VectorTernaryRdByElem)); + Add(Intrinsic.Arm64MlsV, new IntrinsicInfo(0x2e209400u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64MoviV, new IntrinsicInfo(0x0f000400u, IntrinsicType.VectorMovi)); + Add(Intrinsic.Arm64MrsFpcr, new IntrinsicInfo(0xd53b4400u, IntrinsicType.GetRegister)); + Add(Intrinsic.Arm64MsrFpcr, new IntrinsicInfo(0xd51b4400u, IntrinsicType.SetRegister)); + Add(Intrinsic.Arm64MrsFpsr, new IntrinsicInfo(0xd53b4420u, IntrinsicType.GetRegister)); + Add(Intrinsic.Arm64MsrFpsr, new IntrinsicInfo(0xd51b4420u, IntrinsicType.SetRegister)); + Add(Intrinsic.Arm64MulVe, new IntrinsicInfo(0x0f008000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64MulV, new IntrinsicInfo(0x0e209c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64MvniV, new IntrinsicInfo(0x2f000400u, IntrinsicType.VectorMvni)); + Add(Intrinsic.Arm64NegS, new IntrinsicInfo(0x7e20b800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64NegV, new IntrinsicInfo(0x2e20b800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64NotV, new IntrinsicInfo(0x2e205800u, IntrinsicType.VectorUnaryBitwise)); + Add(Intrinsic.Arm64OrnV, new IntrinsicInfo(0x0ee01c00u, IntrinsicType.VectorBinaryBitwise)); + Add(Intrinsic.Arm64OrrVi, new IntrinsicInfo(0x0f001400u, IntrinsicType.VectorBinaryBitwiseImm)); + Add(Intrinsic.Arm64OrrV, new IntrinsicInfo(0x0ea01c00u, IntrinsicType.VectorBinaryBitwise)); + Add(Intrinsic.Arm64PmullV, new IntrinsicInfo(0x0e20e000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64PmulV, new IntrinsicInfo(0x2e209c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64RaddhnV, new IntrinsicInfo(0x2e204000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64RbitV, new IntrinsicInfo(0x2e605800u, IntrinsicType.VectorUnaryBitwise)); + Add(Intrinsic.Arm64Rev16V, new IntrinsicInfo(0x0e201800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64Rev32V, new IntrinsicInfo(0x2e200800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64Rev64V, new IntrinsicInfo(0x0e200800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64RshrnV, new IntrinsicInfo(0x0f008c00u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64RsubhnV, new IntrinsicInfo(0x2e206000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64SabalV, new IntrinsicInfo(0x0e205000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64SabaV, new IntrinsicInfo(0x0e207c00u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64SabdlV, new IntrinsicInfo(0x0e207000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SabdV, new IntrinsicInfo(0x0e207400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SadalpV, new IntrinsicInfo(0x0e206800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64SaddlpV, new IntrinsicInfo(0x0e202800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64SaddlvV, new IntrinsicInfo(0x0e303800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64SaddlV, new IntrinsicInfo(0x0e200000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SaddwV, new IntrinsicInfo(0x0e201000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64ScvtfSFixed, new IntrinsicInfo(0x5f00e400u, IntrinsicType.ScalarFPConvFixed)); + Add(Intrinsic.Arm64ScvtfVFixed, new IntrinsicInfo(0x0f00e400u, IntrinsicType.VectorFPConvFixed)); + Add(Intrinsic.Arm64ScvtfS, new IntrinsicInfo(0x5e21d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64ScvtfV, new IntrinsicInfo(0x0e21d800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64ScvtfGpFixed, new IntrinsicInfo(0x1e020000u, IntrinsicType.ScalarFPConvFixedGpr)); + Add(Intrinsic.Arm64ScvtfGp, new IntrinsicInfo(0x1e220000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64Sha1cV, new IntrinsicInfo(0x5e000000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64Sha1hV, new IntrinsicInfo(0x5e280800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64Sha1mV, new IntrinsicInfo(0x5e002000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64Sha1pV, new IntrinsicInfo(0x5e001000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64Sha1su0V, new IntrinsicInfo(0x5e003000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64Sha1su1V, new IntrinsicInfo(0x5e281800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64Sha256h2V, new IntrinsicInfo(0x5e005000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64Sha256hV, new IntrinsicInfo(0x5e004000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64Sha256su0V, new IntrinsicInfo(0x5e282800u, IntrinsicType.Vector128Unary)); + Add(Intrinsic.Arm64Sha256su1V, new IntrinsicInfo(0x5e006000u, IntrinsicType.Vector128Binary)); + Add(Intrinsic.Arm64ShaddV, new IntrinsicInfo(0x0e200400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64ShllV, new IntrinsicInfo(0x2e213800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64ShlS, new IntrinsicInfo(0x5f005400u, IntrinsicType.ScalarBinaryShl)); + Add(Intrinsic.Arm64ShlV, new IntrinsicInfo(0x0f005400u, IntrinsicType.VectorBinaryShl)); + Add(Intrinsic.Arm64ShrnV, new IntrinsicInfo(0x0f008400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64ShsubV, new IntrinsicInfo(0x0e202400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SliS, new IntrinsicInfo(0x7f005400u, IntrinsicType.ScalarTernaryShlRd)); + Add(Intrinsic.Arm64SliV, new IntrinsicInfo(0x2f005400u, IntrinsicType.VectorTernaryShlRd)); + Add(Intrinsic.Arm64SmaxpV, new IntrinsicInfo(0x0e20a400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SmaxvV, new IntrinsicInfo(0x0e30a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64SmaxV, new IntrinsicInfo(0x0e206400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SminpV, new IntrinsicInfo(0x0e20ac00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SminvV, new IntrinsicInfo(0x0e31a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64SminV, new IntrinsicInfo(0x0e206c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SmlalVe, new IntrinsicInfo(0x0f002000u, IntrinsicType.VectorTernaryRdByElem)); + Add(Intrinsic.Arm64SmlalV, new IntrinsicInfo(0x0e208000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64SmlslVe, new IntrinsicInfo(0x0f006000u, IntrinsicType.VectorTernaryRdByElem)); + Add(Intrinsic.Arm64SmlslV, new IntrinsicInfo(0x0e20a000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64SmovV, new IntrinsicInfo(0x0e002c00u, IntrinsicType.VectorUnaryByElem)); + Add(Intrinsic.Arm64SmullVe, new IntrinsicInfo(0x0f00a000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64SmullV, new IntrinsicInfo(0x0e20c000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqabsS, new IntrinsicInfo(0x5e207800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64SqabsV, new IntrinsicInfo(0x0e207800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64SqaddS, new IntrinsicInfo(0x5e200c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqaddV, new IntrinsicInfo(0x0e200c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqdmlalSe, new IntrinsicInfo(0x5f003000u, IntrinsicType.ScalarBinaryByElem)); + Add(Intrinsic.Arm64SqdmlalVe, new IntrinsicInfo(0x0f003000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64SqdmlalS, new IntrinsicInfo(0x5e209000u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqdmlalV, new IntrinsicInfo(0x0e209000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqdmlslSe, new IntrinsicInfo(0x5f007000u, IntrinsicType.ScalarBinaryByElem)); + Add(Intrinsic.Arm64SqdmlslVe, new IntrinsicInfo(0x0f007000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64SqdmlslS, new IntrinsicInfo(0x5e20b000u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqdmlslV, new IntrinsicInfo(0x0e20b000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqdmulhSe, new IntrinsicInfo(0x5f00c000u, IntrinsicType.ScalarBinaryByElem)); + Add(Intrinsic.Arm64SqdmulhVe, new IntrinsicInfo(0x0f00c000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64SqdmulhS, new IntrinsicInfo(0x5e20b400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqdmulhV, new IntrinsicInfo(0x0e20b400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqdmullSe, new IntrinsicInfo(0x5f00b000u, IntrinsicType.ScalarBinaryByElem)); + Add(Intrinsic.Arm64SqdmullVe, new IntrinsicInfo(0x0f00b000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64SqdmullS, new IntrinsicInfo(0x5e20d000u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqdmullV, new IntrinsicInfo(0x0e20d000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqnegS, new IntrinsicInfo(0x7e207800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64SqnegV, new IntrinsicInfo(0x2e207800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64SqrdmulhSe, new IntrinsicInfo(0x5f00d000u, IntrinsicType.ScalarBinaryByElem)); + Add(Intrinsic.Arm64SqrdmulhVe, new IntrinsicInfo(0x0f00d000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64SqrdmulhS, new IntrinsicInfo(0x7e20b400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqrdmulhV, new IntrinsicInfo(0x2e20b400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqrshlS, new IntrinsicInfo(0x5e205c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqrshlV, new IntrinsicInfo(0x0e205c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqrshrnS, new IntrinsicInfo(0x5f009c00u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SqrshrnV, new IntrinsicInfo(0x0f009c00u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SqrshrunS, new IntrinsicInfo(0x7f008c00u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SqrshrunV, new IntrinsicInfo(0x2f008c00u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SqshluS, new IntrinsicInfo(0x7f006400u, IntrinsicType.ScalarBinaryShl)); + Add(Intrinsic.Arm64SqshluV, new IntrinsicInfo(0x2f006400u, IntrinsicType.VectorBinaryShl)); + Add(Intrinsic.Arm64SqshlSi, new IntrinsicInfo(0x5f007400u, IntrinsicType.ScalarBinaryShl)); + Add(Intrinsic.Arm64SqshlVi, new IntrinsicInfo(0x0f007400u, IntrinsicType.VectorBinaryShl)); + Add(Intrinsic.Arm64SqshlS, new IntrinsicInfo(0x5e204c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqshlV, new IntrinsicInfo(0x0e204c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqshrnS, new IntrinsicInfo(0x5f009400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SqshrnV, new IntrinsicInfo(0x0f009400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SqshrunS, new IntrinsicInfo(0x7f008400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SqshrunV, new IntrinsicInfo(0x2f008400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SqsubS, new IntrinsicInfo(0x5e202c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SqsubV, new IntrinsicInfo(0x0e202c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SqxtnS, new IntrinsicInfo(0x5e214800u, IntrinsicType.ScalarBinaryRd)); + Add(Intrinsic.Arm64SqxtnV, new IntrinsicInfo(0x0e214800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64SqxtunS, new IntrinsicInfo(0x7e212800u, IntrinsicType.ScalarBinaryRd)); + Add(Intrinsic.Arm64SqxtunV, new IntrinsicInfo(0x2e212800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64SrhaddV, new IntrinsicInfo(0x0e201400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SriS, new IntrinsicInfo(0x7f004400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SriV, new IntrinsicInfo(0x2f004400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SrshlS, new IntrinsicInfo(0x5e205400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SrshlV, new IntrinsicInfo(0x0e205400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SrshrS, new IntrinsicInfo(0x5f002400u, IntrinsicType.ScalarBinaryShr)); + Add(Intrinsic.Arm64SrshrV, new IntrinsicInfo(0x0f002400u, IntrinsicType.VectorBinaryShr)); + Add(Intrinsic.Arm64SrsraS, new IntrinsicInfo(0x5f003400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SrsraV, new IntrinsicInfo(0x0f003400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SshllV, new IntrinsicInfo(0x0f00a400u, IntrinsicType.VectorBinaryShl)); + Add(Intrinsic.Arm64SshlS, new IntrinsicInfo(0x5e204400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SshlV, new IntrinsicInfo(0x0e204400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SshrS, new IntrinsicInfo(0x5f000400u, IntrinsicType.ScalarBinaryShr)); + Add(Intrinsic.Arm64SshrV, new IntrinsicInfo(0x0f000400u, IntrinsicType.VectorBinaryShr)); + Add(Intrinsic.Arm64SsraS, new IntrinsicInfo(0x5f001400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64SsraV, new IntrinsicInfo(0x0f001400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64SsublV, new IntrinsicInfo(0x0e202000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SsubwV, new IntrinsicInfo(0x0e203000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64St1Vms, new IntrinsicInfo(0x0c002000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64St1Vss, new IntrinsicInfo(0x0d000000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64St2Vms, new IntrinsicInfo(0x0c008000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64St2Vss, new IntrinsicInfo(0x0d200000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64St3Vms, new IntrinsicInfo(0x0c004000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64St3Vss, new IntrinsicInfo(0x0d002000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64St4Vms, new IntrinsicInfo(0x0c000000u, IntrinsicType.VectorLdSt)); + Add(Intrinsic.Arm64St4Vss, new IntrinsicInfo(0x0d202000u, IntrinsicType.VectorLdStSs)); + Add(Intrinsic.Arm64SubhnV, new IntrinsicInfo(0x0e206000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64SubS, new IntrinsicInfo(0x7e208400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64SubV, new IntrinsicInfo(0x2e208400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64SuqaddS, new IntrinsicInfo(0x5e203800u, IntrinsicType.ScalarBinaryRd)); + Add(Intrinsic.Arm64SuqaddV, new IntrinsicInfo(0x0e203800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64TblV, new IntrinsicInfo(0x0e000000u, IntrinsicType.VectorLookupTable)); + Add(Intrinsic.Arm64TbxV, new IntrinsicInfo(0x0e001000u, IntrinsicType.VectorLookupTable)); + Add(Intrinsic.Arm64Trn1V, new IntrinsicInfo(0x0e002800u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64Trn2V, new IntrinsicInfo(0x0e006800u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UabalV, new IntrinsicInfo(0x2e205000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64UabaV, new IntrinsicInfo(0x2e207c00u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64UabdlV, new IntrinsicInfo(0x2e207000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UabdV, new IntrinsicInfo(0x2e207400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UadalpV, new IntrinsicInfo(0x2e206800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64UaddlpV, new IntrinsicInfo(0x2e202800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UaddlvV, new IntrinsicInfo(0x2e303800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UaddlV, new IntrinsicInfo(0x2e200000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UaddwV, new IntrinsicInfo(0x2e201000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UcvtfSFixed, new IntrinsicInfo(0x7f00e400u, IntrinsicType.ScalarFPConvFixed)); + Add(Intrinsic.Arm64UcvtfVFixed, new IntrinsicInfo(0x2f00e400u, IntrinsicType.VectorFPConvFixed)); + Add(Intrinsic.Arm64UcvtfS, new IntrinsicInfo(0x7e21d800u, IntrinsicType.ScalarUnary)); + Add(Intrinsic.Arm64UcvtfV, new IntrinsicInfo(0x2e21d800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UcvtfGpFixed, new IntrinsicInfo(0x1e030000u, IntrinsicType.ScalarFPConvFixedGpr)); + Add(Intrinsic.Arm64UcvtfGp, new IntrinsicInfo(0x1e230000u, IntrinsicType.ScalarFPConvGpr)); + Add(Intrinsic.Arm64UhaddV, new IntrinsicInfo(0x2e200400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UhsubV, new IntrinsicInfo(0x2e202400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UmaxpV, new IntrinsicInfo(0x2e20a400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UmaxvV, new IntrinsicInfo(0x2e30a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UmaxV, new IntrinsicInfo(0x2e206400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UminpV, new IntrinsicInfo(0x2e20ac00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UminvV, new IntrinsicInfo(0x2e31a800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UminV, new IntrinsicInfo(0x2e206c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UmlalVe, new IntrinsicInfo(0x2f002000u, IntrinsicType.VectorTernaryRdByElem)); + Add(Intrinsic.Arm64UmlalV, new IntrinsicInfo(0x2e208000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64UmlslVe, new IntrinsicInfo(0x2f006000u, IntrinsicType.VectorTernaryRdByElem)); + Add(Intrinsic.Arm64UmlslV, new IntrinsicInfo(0x2e20a000u, IntrinsicType.VectorTernaryRd)); + Add(Intrinsic.Arm64UmovV, new IntrinsicInfo(0x0e003c00u, IntrinsicType.VectorUnaryByElem)); + Add(Intrinsic.Arm64UmullVe, new IntrinsicInfo(0x2f00a000u, IntrinsicType.VectorBinaryByElem)); + Add(Intrinsic.Arm64UmullV, new IntrinsicInfo(0x2e20c000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UqaddS, new IntrinsicInfo(0x7e200c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64UqaddV, new IntrinsicInfo(0x2e200c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UqrshlS, new IntrinsicInfo(0x7e205c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64UqrshlV, new IntrinsicInfo(0x2e205c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UqrshrnS, new IntrinsicInfo(0x7f009c00u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64UqrshrnV, new IntrinsicInfo(0x2f009c00u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64UqshlSi, new IntrinsicInfo(0x7f007400u, IntrinsicType.ScalarBinaryShl)); + Add(Intrinsic.Arm64UqshlVi, new IntrinsicInfo(0x2f007400u, IntrinsicType.VectorBinaryShl)); + Add(Intrinsic.Arm64UqshlS, new IntrinsicInfo(0x7e204c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64UqshlV, new IntrinsicInfo(0x2e204c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UqshrnS, new IntrinsicInfo(0x7f009400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64UqshrnV, new IntrinsicInfo(0x2f009400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64UqsubS, new IntrinsicInfo(0x7e202c00u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64UqsubV, new IntrinsicInfo(0x2e202c00u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UqxtnS, new IntrinsicInfo(0x7e214800u, IntrinsicType.ScalarBinaryRd)); + Add(Intrinsic.Arm64UqxtnV, new IntrinsicInfo(0x2e214800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64UrecpeV, new IntrinsicInfo(0x0ea1c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UrhaddV, new IntrinsicInfo(0x2e201400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UrshlS, new IntrinsicInfo(0x7e205400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64UrshlV, new IntrinsicInfo(0x2e205400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UrshrS, new IntrinsicInfo(0x7f002400u, IntrinsicType.ScalarBinaryShr)); + Add(Intrinsic.Arm64UrshrV, new IntrinsicInfo(0x2f002400u, IntrinsicType.VectorBinaryShr)); + Add(Intrinsic.Arm64UrsqrteV, new IntrinsicInfo(0x2ea1c800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64UrsraS, new IntrinsicInfo(0x7f003400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64UrsraV, new IntrinsicInfo(0x2f003400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64UshllV, new IntrinsicInfo(0x2f00a400u, IntrinsicType.VectorBinaryShl)); + Add(Intrinsic.Arm64UshlS, new IntrinsicInfo(0x7e204400u, IntrinsicType.ScalarBinary)); + Add(Intrinsic.Arm64UshlV, new IntrinsicInfo(0x2e204400u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UshrS, new IntrinsicInfo(0x7f000400u, IntrinsicType.ScalarBinaryShr)); + Add(Intrinsic.Arm64UshrV, new IntrinsicInfo(0x2f000400u, IntrinsicType.VectorBinaryShr)); + Add(Intrinsic.Arm64UsqaddS, new IntrinsicInfo(0x7e203800u, IntrinsicType.ScalarBinaryRd)); + Add(Intrinsic.Arm64UsqaddV, new IntrinsicInfo(0x2e203800u, IntrinsicType.VectorBinaryRd)); + Add(Intrinsic.Arm64UsraS, new IntrinsicInfo(0x7f001400u, IntrinsicType.ScalarTernaryShrRd)); + Add(Intrinsic.Arm64UsraV, new IntrinsicInfo(0x2f001400u, IntrinsicType.VectorTernaryShrRd)); + Add(Intrinsic.Arm64UsublV, new IntrinsicInfo(0x2e202000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64UsubwV, new IntrinsicInfo(0x2e203000u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64Uzp1V, new IntrinsicInfo(0x0e001800u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64Uzp2V, new IntrinsicInfo(0x0e005800u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64XtnV, new IntrinsicInfo(0x0e212800u, IntrinsicType.VectorUnary)); + Add(Intrinsic.Arm64Zip1V, new IntrinsicInfo(0x0e003800u, IntrinsicType.VectorBinary)); + Add(Intrinsic.Arm64Zip2V, new IntrinsicInfo(0x0e007800u, IntrinsicType.VectorBinary)); + } + + private static void Add(Intrinsic intrin, IntrinsicInfo info) + { + _intrinTable[(int)intrin] = info; + } + + public static IntrinsicInfo GetInfo(Intrinsic intrin) + { + return _intrinTable[(int)intrin]; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs b/src/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs new file mode 100644 index 00000000..800eca93 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/IntrinsicType.cs @@ -0,0 +1,59 @@ +namespace ARMeilleure.CodeGen.Arm64 +{ + enum IntrinsicType + { + ScalarUnary, + ScalarUnaryByElem, + ScalarBinary, + ScalarBinaryByElem, + ScalarBinaryFPByElem, + ScalarBinaryRd, + ScalarBinaryShl, + ScalarBinaryShr, + ScalarFcsel, + ScalarFmovi, + ScalarFPCompare, + ScalarFPCompareCond, + ScalarFPConv, + ScalarFPConvFixed, + ScalarFPConvFixedGpr, + ScalarFPConvGpr, + ScalarTernary, + ScalarTernaryFPRdByElem, + ScalarTernaryShlRd, + ScalarTernaryShrRd, + + VectorUnary, + VectorUnaryBitwise, + VectorUnaryByElem, + VectorBinary, + VectorBinaryBitwise, + VectorBinaryBitwiseImm, + VectorBinaryByElem, + VectorBinaryFPByElem, + VectorBinaryRd, + VectorBinaryShl, + VectorBinaryShr, + VectorExt, + VectorFmovi, + VectorFPConvFixed, + VectorInsertByElem, + VectorLdSt, + VectorLdStSs, + VectorLookupTable, + VectorMovi, + VectorMvni, + VectorTernaryFPRdByElem, + VectorTernaryRd, + VectorTernaryRdBitwise, + VectorTernaryRdByElem, + VectorTernaryShlRd, + VectorTernaryShrRd, + + Vector128Unary, + Vector128Binary, + + GetRegister, + SetRegister + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Arm64/PreAllocator.cs b/src/ARMeilleure/CodeGen/Arm64/PreAllocator.cs new file mode 100644 index 00000000..6ea9d239 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Arm64/PreAllocator.cs @@ -0,0 +1,892 @@ +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.CodeGen.Arm64 +{ + static class PreAllocator + { + private class ConstantDict + { + private readonly Dictionary<(ulong, OperandType), Operand> _constants; + + public ConstantDict() + { + _constants = new Dictionary<(ulong, OperandType), Operand>(); + } + + public void Add(ulong value, OperandType type, Operand local) + { + _constants.Add((value, type), local); + } + + public bool TryGetValue(ulong value, OperandType type, out Operand local) + { + return _constants.TryGetValue((value, type), out local); + } + } + + public static void RunPass(CompilerContext cctx, StackAllocator stackAlloc, out int maxCallArgs) + { + maxCallArgs = -1; + + Span<Operation> buffer = default; + + Operand[] preservedArgs = new Operand[CallingConvention.GetArgumentsOnRegsCount()]; + + for (BasicBlock block = cctx.Cfg.Blocks.First; block != null; block = block.ListNext) + { + ConstantDict constants = new ConstantDict(); + + Operation nextNode; + + for (Operation node = block.Operations.First; node != default; node = nextNode) + { + nextNode = node.ListNext; + + if (node.Instruction == Instruction.Phi) + { + continue; + } + + InsertConstantRegCopies(constants, block.Operations, node); + InsertDestructiveRegCopies(block.Operations, node); + + switch (node.Instruction) + { + case Instruction.Call: + // Get the maximum number of arguments used on a call. + // On windows, when a struct is returned from the call, + // we also need to pass the pointer where the struct + // should be written on the first argument. + int argsCount = node.SourcesCount - 1; + + if (node.Destination != default && node.Destination.Type == OperandType.V128) + { + argsCount++; + } + + if (maxCallArgs < argsCount) + { + maxCallArgs = argsCount; + } + + // Copy values to registers expected by the function + // being called, as mandated by the ABI. + InsertCallCopies(constants, block.Operations, node); + break; + case Instruction.CompareAndSwap: + case Instruction.CompareAndSwap16: + case Instruction.CompareAndSwap8: + nextNode = GenerateCompareAndSwap(block.Operations, node); + break; + case Instruction.LoadArgument: + nextNode = InsertLoadArgumentCopy(cctx, ref buffer, block.Operations, preservedArgs, node); + break; + case Instruction.Return: + InsertReturnCopy(block.Operations, node); + break; + case Instruction.Tailcall: + InsertTailcallCopies(constants, block.Operations, stackAlloc, node, node); + break; + } + } + } + } + + private static void InsertConstantRegCopies(ConstantDict constants, IntrusiveList<Operation> nodes, Operation node) + { + if (node.SourcesCount == 0 || IsIntrinsicWithConst(node)) + { + return; + } + + Instruction inst = node.Instruction; + + Operand src1 = node.GetSource(0); + Operand src2; + + if (src1.Kind == OperandKind.Constant) + { + if (!src1.Type.IsInteger()) + { + // Handle non-integer types (FP32, FP64 and V128). + // For instructions without an immediate operand, we do the following: + // - Insert a copy with the constant value (as integer) to a GPR. + // - Insert a copy from the GPR to a XMM register. + // - Replace the constant use with the XMM register. + src1 = AddFloatConstantCopy(constants, nodes, node, src1); + + node.SetSource(0, src1); + } + else if (!HasConstSrc1(node, src1.Value)) + { + // Handle integer types. + // Most ALU instructions accepts a 32-bits immediate on the second operand. + // We need to ensure the following: + // - If the constant is on operand 1, we need to move it. + // -- But first, we try to swap operand 1 and 2 if the instruction is commutative. + // -- Doing so may allow us to encode the constant as operand 2 and avoid a copy. + // - If the constant is on operand 2, we check if the instruction supports it, + // if not, we also add a copy. 64-bits constants are usually not supported. + if (IsCommutative(node)) + { + src2 = node.GetSource(1); + + Operand temp = src1; + + src1 = src2; + src2 = temp; + + node.SetSource(0, src1); + node.SetSource(1, src2); + } + + if (src1.Kind == OperandKind.Constant) + { + src1 = AddIntConstantCopy(constants, nodes, node, src1); + + node.SetSource(0, src1); + } + } + } + + if (node.SourcesCount < 2) + { + return; + } + + src2 = node.GetSource(1); + + if (src2.Kind == OperandKind.Constant) + { + if (!src2.Type.IsInteger()) + { + src2 = AddFloatConstantCopy(constants, nodes, node, src2); + + node.SetSource(1, src2); + } + else if (!HasConstSrc2(inst, src2)) + { + src2 = AddIntConstantCopy(constants, nodes, node, src2); + + node.SetSource(1, src2); + } + } + + if (node.SourcesCount < 3 || + node.Instruction == Instruction.BranchIf || + node.Instruction == Instruction.Compare || + node.Instruction == Instruction.VectorInsert || + node.Instruction == Instruction.VectorInsert16 || + node.Instruction == Instruction.VectorInsert8) + { + return; + } + + for (int srcIndex = 2; srcIndex < node.SourcesCount; srcIndex++) + { + Operand src = node.GetSource(srcIndex); + + if (src.Kind == OperandKind.Constant) + { + if (!src.Type.IsInteger()) + { + src = AddFloatConstantCopy(constants, nodes, node, src); + + node.SetSource(srcIndex, src); + } + else + { + src = AddIntConstantCopy(constants, nodes, node, src); + + node.SetSource(srcIndex, src); + } + } + } + } + + private static void InsertDestructiveRegCopies(IntrusiveList<Operation> nodes, Operation node) + { + if (node.Destination == default || node.SourcesCount == 0) + { + return; + } + + Operand dest = node.Destination; + Operand src1 = node.GetSource(0); + + if (IsSameOperandDestSrc1(node) && src1.Kind == OperandKind.LocalVariable) + { + bool useNewLocal = false; + + for (int srcIndex = 1; srcIndex < node.SourcesCount; srcIndex++) + { + if (node.GetSource(srcIndex) == dest) + { + useNewLocal = true; + + break; + } + } + + if (useNewLocal) + { + // Dest is being used as some source already, we need to use a new + // local to store the temporary value, otherwise the value on dest + // local would be overwritten. + Operand temp = Local(dest.Type); + + nodes.AddBefore(node, Operation(Instruction.Copy, temp, src1)); + + node.SetSource(0, temp); + + nodes.AddAfter(node, Operation(Instruction.Copy, dest, temp)); + + node.Destination = temp; + } + else + { + nodes.AddBefore(node, Operation(Instruction.Copy, dest, src1)); + + node.SetSource(0, dest); + } + } + } + + private static void InsertCallCopies(ConstantDict constants, IntrusiveList<Operation> nodes, Operation node) + { + Operation operation = node; + + Operand dest = operation.Destination; + + List<Operand> sources = new List<Operand> + { + operation.GetSource(0) + }; + + int argsCount = operation.SourcesCount - 1; + + int intMax = CallingConvention.GetArgumentsOnRegsCount(); + int vecMax = CallingConvention.GetArgumentsOnRegsCount(); + + int intCount = 0; + int vecCount = 0; + + int stackOffset = 0; + + for (int index = 0; index < argsCount; index++) + { + Operand source = operation.GetSource(index + 1); + + bool passOnReg; + + if (source.Type.IsInteger()) + { + passOnReg = intCount < intMax; + } + else if (source.Type == OperandType.V128) + { + passOnReg = intCount + 1 < intMax; + } + else + { + passOnReg = vecCount < vecMax; + } + + if (source.Type == OperandType.V128 && passOnReg) + { + // V128 is a struct, we pass each half on a GPR if possible. + Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + + nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0))); + nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1))); + + continue; + } + + if (passOnReg) + { + Operand argReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type); + + Operation copyOp = Operation(Instruction.Copy, argReg, source); + + InsertConstantRegCopies(constants, nodes, nodes.AddBefore(node, copyOp)); + + sources.Add(argReg); + } + else + { + Operand offset = Const(stackOffset); + + Operation spillOp = Operation(Instruction.SpillArg, default, offset, source); + + InsertConstantRegCopies(constants, nodes, nodes.AddBefore(node, spillOp)); + + stackOffset += source.Type.GetSizeInBytes(); + } + } + + if (dest != default) + { + if (dest.Type == OperandType.V128) + { + Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64); + Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64); + + node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, retLReg)); + nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, retHReg, Const(1))); + + operation.Destination = default; + } + else + { + Operand retReg = dest.Type.IsInteger() + ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type) + : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type); + + Operation copyOp = Operation(Instruction.Copy, dest, retReg); + + nodes.AddAfter(node, copyOp); + + operation.Destination = retReg; + } + } + + operation.SetSources(sources.ToArray()); + } + + private static void InsertTailcallCopies( + ConstantDict constants, + IntrusiveList<Operation> nodes, + StackAllocator stackAlloc, + Operation node, + Operation operation) + { + List<Operand> sources = new List<Operand> + { + operation.GetSource(0) + }; + + int argsCount = operation.SourcesCount - 1; + + int intMax = CallingConvention.GetArgumentsOnRegsCount(); + int vecMax = CallingConvention.GetArgumentsOnRegsCount(); + + int intCount = 0; + int vecCount = 0; + + // Handle arguments passed on registers. + for (int index = 0; index < argsCount; index++) + { + Operand source = operation.GetSource(1 + index); + + bool passOnReg; + + if (source.Type.IsInteger()) + { + passOnReg = intCount + 1 < intMax; + } + else + { + passOnReg = vecCount < vecMax; + } + + if (source.Type == OperandType.V128 && passOnReg) + { + // V128 is a struct, we pass each half on a GPR if possible. + Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + + nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0))); + nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1))); + + continue; + } + + if (passOnReg) + { + Operand argReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type); + + Operation copyOp = Operation(Instruction.Copy, argReg, source); + + InsertConstantRegCopies(constants, nodes, nodes.AddBefore(node, copyOp)); + + sources.Add(argReg); + } + else + { + throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)"); + } + } + + // The target address must be on the return registers, since we + // don't return anything and it is guaranteed to not be a + // callee saved register (which would be trashed on the epilogue). + Operand tcAddress = Gpr(CodeGenCommon.TcAddressRegister, OperandType.I64); + + Operation addrCopyOp = Operation(Instruction.Copy, tcAddress, operation.GetSource(0)); + + nodes.AddBefore(node, addrCopyOp); + + sources[0] = tcAddress; + + operation.SetSources(sources.ToArray()); + } + + private static Operation GenerateCompareAndSwap(IntrusiveList<Operation> nodes, Operation node) + { + Operand expected = node.GetSource(1); + + if (expected.Type == OperandType.V128) + { + Operand dest = node.Destination; + Operand expectedLow = Local(OperandType.I64); + Operand expectedHigh = Local(OperandType.I64); + Operand desiredLow = Local(OperandType.I64); + Operand desiredHigh = Local(OperandType.I64); + Operand actualLow = Local(OperandType.I64); + Operand actualHigh = Local(OperandType.I64); + + Operand address = node.GetSource(0); + Operand desired = node.GetSource(2); + + void SplitOperand(Operand source, Operand low, Operand high) + { + nodes.AddBefore(node, Operation(Instruction.VectorExtract, low, source, Const(0))); + nodes.AddBefore(node, Operation(Instruction.VectorExtract, high, source, Const(1))); + } + + SplitOperand(expected, expectedLow, expectedHigh); + SplitOperand(desired, desiredLow, desiredHigh); + + Operation operation = node; + + // Update the sources and destinations with split 64-bit halfs of the whole 128-bit values. + // We also need a additional registers that will be used to store temporary information. + operation.SetDestinations(new[] { actualLow, actualHigh, Local(OperandType.I64), Local(OperandType.I64) }); + operation.SetSources(new[] { address, expectedLow, expectedHigh, desiredLow, desiredHigh }); + + // Add some dummy uses of the input operands, as the CAS operation will be a loop, + // so they can't be used as destination operand. + for (int i = 0; i < operation.SourcesCount; i++) + { + Operand src = operation.GetSource(i); + node = nodes.AddAfter(node, Operation(Instruction.Copy, src, src)); + } + + // Assemble the vector with the 64-bit values at the given memory location. + node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, actualLow)); + node = nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, actualHigh, Const(1))); + } + else + { + // We need a additional register where the store result will be written to. + node.SetDestinations(new[] { node.Destination, Local(OperandType.I32) }); + + // Add some dummy uses of the input operands, as the CAS operation will be a loop, + // so they can't be used as destination operand. + Operation operation = node; + + for (int i = 0; i < operation.SourcesCount; i++) + { + Operand src = operation.GetSource(i); + node = nodes.AddAfter(node, Operation(Instruction.Copy, src, src)); + } + } + + return node.ListNext; + } + + private static void InsertReturnCopy(IntrusiveList<Operation> nodes, Operation node) + { + if (node.SourcesCount == 0) + { + return; + } + + Operand source = node.GetSource(0); + + if (source.Type == OperandType.V128) + { + Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64); + Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64); + + nodes.AddBefore(node, Operation(Instruction.VectorExtract, retLReg, source, Const(0))); + nodes.AddBefore(node, Operation(Instruction.VectorExtract, retHReg, source, Const(1))); + } + else + { + Operand retReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntReturnRegister(), source.Type) + : Xmm(CallingConvention.GetVecReturnRegister(), source.Type); + + Operation retCopyOp = Operation(Instruction.Copy, retReg, source); + + nodes.AddBefore(node, retCopyOp); + } + } + + private static Operation InsertLoadArgumentCopy( + CompilerContext cctx, + ref Span<Operation> buffer, + IntrusiveList<Operation> nodes, + Operand[] preservedArgs, + Operation node) + { + Operand source = node.GetSource(0); + + Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind."); + + int index = source.AsInt32(); + + int intCount = 0; + int vecCount = 0; + + for (int cIndex = 0; cIndex < index; cIndex++) + { + OperandType argType = cctx.FuncArgTypes[cIndex]; + + if (argType.IsInteger()) + { + intCount++; + } + else if (argType == OperandType.V128) + { + intCount += 2; + } + else + { + vecCount++; + } + } + + bool passOnReg; + + if (source.Type.IsInteger()) + { + passOnReg = intCount < CallingConvention.GetArgumentsOnRegsCount(); + } + else if (source.Type == OperandType.V128) + { + passOnReg = intCount + 1 < CallingConvention.GetArgumentsOnRegsCount(); + } + else + { + passOnReg = vecCount < CallingConvention.GetArgumentsOnRegsCount(); + } + + if (passOnReg) + { + Operand dest = node.Destination; + + if (preservedArgs[index] == default) + { + if (dest.Type == OperandType.V128) + { + // V128 is a struct, we pass each half on a GPR if possible. + Operand pArg = Local(OperandType.V128); + + Operand argLReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount), OperandType.I64); + Operand argHReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount + 1), OperandType.I64); + + Operation copyL = Operation(Instruction.VectorCreateScalar, pArg, argLReg); + Operation copyH = Operation(Instruction.VectorInsert, pArg, pArg, argHReg, Const(1)); + + cctx.Cfg.Entry.Operations.AddFirst(copyH); + cctx.Cfg.Entry.Operations.AddFirst(copyL); + + preservedArgs[index] = pArg; + } + else + { + Operand pArg = Local(dest.Type); + + Operand argReg = dest.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(intCount), dest.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(vecCount), dest.Type); + + Operation copyOp = Operation(Instruction.Copy, pArg, argReg); + + cctx.Cfg.Entry.Operations.AddFirst(copyOp); + + preservedArgs[index] = pArg; + } + } + + Operation nextNode; + + if (dest.AssignmentsCount == 1) + { + // Let's propagate the argument if we can to avoid copies. + PreAllocatorCommon.Propagate(ref buffer, dest, preservedArgs[index]); + nextNode = node.ListNext; + } + else + { + Operation argCopyOp = Operation(Instruction.Copy, dest, preservedArgs[index]); + nextNode = nodes.AddBefore(node, argCopyOp); + } + + Delete(nodes, node); + return nextNode; + } + else + { + // TODO: Pass on stack. + return node; + } + } + + private static Operand AddFloatConstantCopy( + ConstantDict constants, + IntrusiveList<Operation> nodes, + Operation node, + Operand source) + { + Operand temp = Local(source.Type); + + Operand intConst = AddIntConstantCopy(constants, nodes, node, GetIntConst(source)); + + Operation copyOp = Operation(Instruction.VectorCreateScalar, temp, intConst); + + nodes.AddBefore(node, copyOp); + + return temp; + } + + private static Operand AddIntConstantCopy( + ConstantDict constants, + IntrusiveList<Operation> nodes, + Operation node, + Operand source) + { + if (constants.TryGetValue(source.Value, source.Type, out Operand temp)) + { + return temp; + } + + temp = Local(source.Type); + + Operation copyOp = Operation(Instruction.Copy, temp, source); + + nodes.AddBefore(node, copyOp); + + constants.Add(source.Value, source.Type, temp); + + return temp; + } + + private static Operand GetIntConst(Operand value) + { + if (value.Type == OperandType.FP32) + { + return Const(value.AsInt32()); + } + else if (value.Type == OperandType.FP64) + { + return Const(value.AsInt64()); + } + + return value; + } + + private static void Delete(IntrusiveList<Operation> nodes, Operation node) + { + node.Destination = default; + + for (int index = 0; index < node.SourcesCount; index++) + { + node.SetSource(index, default); + } + + nodes.Remove(node); + } + + private static Operand Gpr(int register, OperandType type) + { + return Register(register, RegisterType.Integer, type); + } + + private static Operand Xmm(int register, OperandType type) + { + return Register(register, RegisterType.Vector, type); + } + + private static bool IsSameOperandDestSrc1(Operation operation) + { + switch (operation.Instruction) + { + case Instruction.Extended: + return IsSameOperandDestSrc1(operation.Intrinsic); + case Instruction.VectorInsert: + case Instruction.VectorInsert16: + case Instruction.VectorInsert8: + return true; + } + + return false; + } + + private static bool IsSameOperandDestSrc1(Intrinsic intrinsic) + { + IntrinsicInfo info = IntrinsicTable.GetInfo(intrinsic & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask)); + + return info.Type == IntrinsicType.ScalarBinaryRd || + info.Type == IntrinsicType.ScalarTernaryFPRdByElem || + info.Type == IntrinsicType.ScalarTernaryShlRd || + info.Type == IntrinsicType.ScalarTernaryShrRd || + info.Type == IntrinsicType.VectorBinaryRd || + info.Type == IntrinsicType.VectorInsertByElem || + info.Type == IntrinsicType.VectorTernaryRd || + info.Type == IntrinsicType.VectorTernaryRdBitwise || + info.Type == IntrinsicType.VectorTernaryFPRdByElem || + info.Type == IntrinsicType.VectorTernaryRdByElem || + info.Type == IntrinsicType.VectorTernaryShlRd || + info.Type == IntrinsicType.VectorTernaryShrRd; + } + + private static bool HasConstSrc1(Operation node, ulong value) + { + switch (node.Instruction) + { + case Instruction.Add: + case Instruction.BranchIf: + case Instruction.Compare: + case Instruction.Subtract: + // The immediate encoding of those instructions does not allow Rn to be + // XZR (it will be SP instead), so we can't allow a Rn constant in this case. + return value == 0 && NotConstOrConst0(node.GetSource(1)); + case Instruction.BitwiseAnd: + case Instruction.BitwiseExclusiveOr: + case Instruction.BitwiseNot: + case Instruction.BitwiseOr: + case Instruction.ByteSwap: + case Instruction.CountLeadingZeros: + case Instruction.Multiply: + case Instruction.Negate: + case Instruction.RotateRight: + case Instruction.ShiftLeft: + case Instruction.ShiftRightSI: + case Instruction.ShiftRightUI: + return value == 0; + case Instruction.Copy: + case Instruction.LoadArgument: + case Instruction.Spill: + case Instruction.SpillArg: + return true; + case Instruction.Extended: + return value == 0; + } + + return false; + } + + private static bool NotConstOrConst0(Operand operand) + { + return operand.Kind != OperandKind.Constant || operand.Value == 0; + } + + private static bool HasConstSrc2(Instruction inst, Operand operand) + { + ulong value = operand.Value; + + switch (inst) + { + case Instruction.Add: + case Instruction.BranchIf: + case Instruction.Compare: + case Instruction.Subtract: + return ConstFitsOnUImm12Sh(value); + case Instruction.BitwiseAnd: + case Instruction.BitwiseExclusiveOr: + case Instruction.BitwiseOr: + return value == 0 || CodeGenCommon.TryEncodeBitMask(operand, out _, out _, out _); + case Instruction.Multiply: + case Instruction.Store: + case Instruction.Store16: + case Instruction.Store8: + return value == 0; + case Instruction.RotateRight: + case Instruction.ShiftLeft: + case Instruction.ShiftRightSI: + case Instruction.ShiftRightUI: + case Instruction.VectorExtract: + case Instruction.VectorExtract16: + case Instruction.VectorExtract8: + return true; + case Instruction.Extended: + // TODO: Check if actual intrinsic is supposed to have consts here? + // Right now we only hit this case for fixed-point int <-> FP conversion instructions. + return true; + } + + return false; + } + + private static bool IsCommutative(Operation operation) + { + switch (operation.Instruction) + { + case Instruction.Add: + case Instruction.BitwiseAnd: + case Instruction.BitwiseExclusiveOr: + case Instruction.BitwiseOr: + case Instruction.Multiply: + return true; + + case Instruction.BranchIf: + case Instruction.Compare: + { + Operand comp = operation.GetSource(2); + + Debug.Assert(comp.Kind == OperandKind.Constant); + + var compType = (Comparison)comp.AsInt32(); + + return compType == Comparison.Equal || compType == Comparison.NotEqual; + } + } + + return false; + } + + private static bool ConstFitsOnUImm12Sh(ulong value) + { + return (value & ~0xfffUL) == 0 || (value & ~0xfff000UL) == 0; + } + + private static bool IsIntrinsicWithConst(Operation operation) + { + bool isIntrinsic = IsIntrinsic(operation.Instruction); + + if (isIntrinsic) + { + Intrinsic intrinsic = operation.Intrinsic; + IntrinsicInfo info = IntrinsicTable.GetInfo(intrinsic & ~(Intrinsic.Arm64VTypeMask | Intrinsic.Arm64VSizeMask)); + + // Those have integer inputs that don't support consts. + return info.Type != IntrinsicType.ScalarFPConvGpr && + info.Type != IntrinsicType.ScalarFPConvFixedGpr && + info.Type != IntrinsicType.SetRegister; + } + + return false; + } + + private static bool IsIntrinsic(Instruction inst) + { + return inst == Instruction.Extended; + } + } +} diff --git a/src/ARMeilleure/CodeGen/CompiledFunction.cs b/src/ARMeilleure/CodeGen/CompiledFunction.cs new file mode 100644 index 00000000..0560bf2e --- /dev/null +++ b/src/ARMeilleure/CodeGen/CompiledFunction.cs @@ -0,0 +1,68 @@ +using ARMeilleure.CodeGen.Linking; +using ARMeilleure.CodeGen.Unwinding; +using ARMeilleure.Translation.Cache; +using System; +using System.Runtime.InteropServices; + +namespace ARMeilleure.CodeGen +{ + /// <summary> + /// Represents a compiled function. + /// </summary> + readonly struct CompiledFunction + { + /// <summary> + /// Gets the machine code of the <see cref="CompiledFunction"/>. + /// </summary> + public byte[] Code { get; } + + /// <summary> + /// Gets the <see cref="Unwinding.UnwindInfo"/> of the <see cref="CompiledFunction"/>. + /// </summary> + public UnwindInfo UnwindInfo { get; } + + /// <summary> + /// Gets the <see cref="Linking.RelocInfo"/> of the <see cref="CompiledFunction"/>. + /// </summary> + public RelocInfo RelocInfo { get; } + + /// <summary> + /// Initializes a new instance of the <see cref="CompiledFunction"/> struct with the specified machine code, + /// unwind info and relocation info. + /// </summary> + /// <param name="code">Machine code</param> + /// <param name="unwindInfo">Unwind info</param> + /// <param name="relocInfo">Relocation info</param> + internal CompiledFunction(byte[] code, UnwindInfo unwindInfo, RelocInfo relocInfo) + { + Code = code; + UnwindInfo = unwindInfo; + RelocInfo = relocInfo; + } + + /// <summary> + /// Maps the <see cref="CompiledFunction"/> onto the <see cref="JitCache"/> and returns a delegate of type + /// <typeparamref name="T"/> pointing to the mapped function. + /// </summary> + /// <typeparam name="T">Type of delegate</typeparam> + /// <returns>A delegate of type <typeparamref name="T"/> pointing to the mapped function</returns> + public T Map<T>() + { + return MapWithPointer<T>(out _); + } + + /// <summary> + /// Maps the <see cref="CompiledFunction"/> onto the <see cref="JitCache"/> and returns a delegate of type + /// <typeparamref name="T"/> pointing to the mapped function. + /// </summary> + /// <typeparam name="T">Type of delegate</typeparam> + /// <param name="codePointer">Pointer to the function code in memory</param> + /// <returns>A delegate of type <typeparamref name="T"/> pointing to the mapped function</returns> + public T MapWithPointer<T>(out IntPtr codePointer) + { + codePointer = JitCache.Map(this); + + return Marshal.GetDelegateForFunctionPointer<T>(codePointer); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Linking/RelocEntry.cs b/src/ARMeilleure/CodeGen/Linking/RelocEntry.cs new file mode 100644 index 00000000..a27bfded --- /dev/null +++ b/src/ARMeilleure/CodeGen/Linking/RelocEntry.cs @@ -0,0 +1,38 @@ +namespace ARMeilleure.CodeGen.Linking +{ + /// <summary> + /// Represents a relocation. + /// </summary> + readonly struct RelocEntry + { + public const int Stride = 13; // Bytes. + + /// <summary> + /// Gets the position of the relocation. + /// </summary> + public int Position { get; } + + /// <summary> + /// Gets the <see cref="Symbol"/> of the relocation. + /// </summary> + public Symbol Symbol { get; } + + /// <summary> + /// Initializes a new instance of the <see cref="RelocEntry"/> struct with the specified position and + /// <see cref="Symbol"/>. + /// </summary> + /// <param name="position">Position of relocation</param> + /// <param name="symbol">Symbol of relocation</param> + public RelocEntry(int position, Symbol symbol) + { + Position = position; + Symbol = symbol; + } + + /// <inheritdoc/> + public override string ToString() + { + return $"({nameof(Position)} = {Position}, {nameof(Symbol)} = {Symbol})"; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Linking/RelocInfo.cs b/src/ARMeilleure/CodeGen/Linking/RelocInfo.cs new file mode 100644 index 00000000..caaf08e3 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Linking/RelocInfo.cs @@ -0,0 +1,32 @@ +using System; + +namespace ARMeilleure.CodeGen.Linking +{ + /// <summary> + /// Represents relocation information about a <see cref="CompiledFunction"/>. + /// </summary> + readonly struct RelocInfo + { + /// <summary> + /// Gets an empty <see cref="RelocInfo"/>. + /// </summary> + public static RelocInfo Empty { get; } = new RelocInfo(null); + + private readonly RelocEntry[] _entries; + + /// <summary> + /// Gets the set of <see cref="RelocEntry"/>. + /// </summary> + public ReadOnlySpan<RelocEntry> Entries => _entries; + + /// <summary> + /// Initializes a new instance of the <see cref="RelocInfo"/> struct with the specified set of + /// <see cref="RelocEntry"/>. + /// </summary> + /// <param name="entries">Set of <see cref="RelocInfo"/> to use</param> + public RelocInfo(RelocEntry[] entries) + { + _entries = entries; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Linking/Symbol.cs b/src/ARMeilleure/CodeGen/Linking/Symbol.cs new file mode 100644 index 00000000..39e0c3eb --- /dev/null +++ b/src/ARMeilleure/CodeGen/Linking/Symbol.cs @@ -0,0 +1,99 @@ +using System; + +namespace ARMeilleure.CodeGen.Linking +{ + /// <summary> + /// Represents a symbol. + /// </summary> + readonly struct Symbol + { + private readonly ulong _value; + + /// <summary> + /// Gets the <see cref="SymbolType"/> of the <see cref="Symbol"/>. + /// </summary> + public SymbolType Type { get; } + + /// <summary> + /// Gets the value of the <see cref="Symbol"/>. + /// </summary> + /// <exception cref="InvalidOperationException"><see cref="Type"/> is <see cref="SymbolType.None"/></exception> + public ulong Value + { + get + { + if (Type == SymbolType.None) + { + ThrowSymbolNone(); + } + + return _value; + } + } + + /// <summary> + /// Initializes a new instance of the <see cref="Symbol"/> structure with the specified <see cref="SymbolType"/> and value. + /// </summary> + /// <param name="type">Type of symbol</param> + /// <param name="value">Value of symbol</param> + public Symbol(SymbolType type, ulong value) + { + (Type, _value) = (type, value); + } + + /// <summary> + /// Determines if the specified <see cref="Symbol"/> instances are equal. + /// </summary> + /// <param name="a">First instance</param> + /// <param name="b">Second instance</param> + /// <returns><see langword="true"/> if equal; otherwise <see langword="false"/></returns> + public static bool operator ==(Symbol a, Symbol b) + { + return a.Equals(b); + } + + /// <summary> + /// Determines if the specified <see cref="Symbol"/> instances are not equal. + /// </summary> + /// <param name="a">First instance</param> + /// <param name="b">Second instance</param> + /// <returns><see langword="true"/> if not equal; otherwise <see langword="false"/></returns> + public static bool operator !=(Symbol a, Symbol b) + { + return !(a == b); + } + + /// <summary> + /// Determines if the specified <see cref="Symbol"/> is equal to this <see cref="Symbol"/> instance. + /// </summary> + /// <param name="other">Other <see cref="Symbol"/> instance</param> + /// <returns><see langword="true"/> if equal; otherwise <see langword="false"/></returns> + public bool Equals(Symbol other) + { + return other.Type == Type && other._value == _value; + } + + /// <inheritdoc/> + public override bool Equals(object obj) + { + return obj is Symbol sym && Equals(sym); + } + + /// <inheritdoc/> + public override int GetHashCode() + { + return HashCode.Combine(Type, _value); + } + + /// <inheritdoc/> + public override string ToString() + { + return $"{Type}:{_value}"; + } + + private static void ThrowSymbolNone() + { + throw new InvalidOperationException("Symbol refers to nothing."); + } + } +} diff --git a/src/ARMeilleure/CodeGen/Linking/SymbolType.cs b/src/ARMeilleure/CodeGen/Linking/SymbolType.cs new file mode 100644 index 00000000..b05b6969 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Linking/SymbolType.cs @@ -0,0 +1,28 @@ +namespace ARMeilleure.CodeGen.Linking +{ + /// <summary> + /// Types of <see cref="Symbol"/>. + /// </summary> + enum SymbolType : byte + { + /// <summary> + /// Refers to nothing, i.e no symbol. + /// </summary> + None, + + /// <summary> + /// Refers to an entry in <see cref="Translation.Delegates"/>. + /// </summary> + DelegateTable, + + /// <summary> + /// Refers to an entry in <see cref="Translation.Translator.FunctionTable"/>. + /// </summary> + FunctionTable, + + /// <summary> + /// Refers to a special symbol which is handled by <see cref="Translation.PTC.Ptc.PatchCode"/>. + /// </summary> + Special + } +} diff --git a/src/ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs b/src/ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs new file mode 100644 index 00000000..9e243d37 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Optimizations/BlockPlacement.cs @@ -0,0 +1,72 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System.Diagnostics; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.CodeGen.Optimizations +{ + static class BlockPlacement + { + public static void RunPass(ControlFlowGraph cfg) + { + bool update = false; + + BasicBlock block; + BasicBlock nextBlock; + + BasicBlock lastBlock = cfg.Blocks.Last; + + // Move cold blocks at the end of the list, so that they are emitted away from hot code. + for (block = cfg.Blocks.First; block != null; block = nextBlock) + { + nextBlock = block.ListNext; + + if (block.Frequency == BasicBlockFrequency.Cold) + { + cfg.Blocks.Remove(block); + cfg.Blocks.AddLast(block); + } + + if (block == lastBlock) + { + break; + } + } + + for (block = cfg.Blocks.First; block != null; block = nextBlock) + { + nextBlock = block.ListNext; + + if (block.SuccessorsCount == 2) + { + Operation branchOp = block.Operations.Last; + + Debug.Assert(branchOp.Instruction == Instruction.BranchIf); + + BasicBlock falseSucc = block.GetSuccessor(0); + BasicBlock trueSucc = block.GetSuccessor(1); + + // If true successor is next block in list, invert the condition. We avoid extra branching by + // making the true side the fallthrough (i.e, convert it to the false side). + if (trueSucc == block.ListNext) + { + Comparison comp = (Comparison)branchOp.GetSource(2).AsInt32(); + Comparison compInv = comp.Invert(); + + branchOp.SetSource(2, Const((int)compInv)); + + block.SetSuccessor(0, trueSucc); + block.SetSuccessor(1, falseSucc); + + update = true; + } + } + } + + if (update) + { + cfg.Update(); + } + } + } +} diff --git a/src/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs b/src/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs new file mode 100644 index 00000000..c5a22a53 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Optimizations/ConstantFolding.cs @@ -0,0 +1,346 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.CodeGen.Optimizations +{ + static class ConstantFolding + { + public static void RunPass(Operation operation) + { + if (operation.Destination == default || operation.SourcesCount == 0) + { + return; + } + + if (!AreAllSourcesConstant(operation)) + { + return; + } + + OperandType type = operation.Destination.Type; + + switch (operation.Instruction) + { + case Instruction.Add: + if (operation.GetSource(0).Relocatable || + operation.GetSource(1).Relocatable) + { + break; + } + + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x + y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x + y); + } + break; + + case Instruction.BitwiseAnd: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x & y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x & y); + } + break; + + case Instruction.BitwiseExclusiveOr: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x ^ y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x ^ y); + } + break; + + case Instruction.BitwiseNot: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => ~x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => ~x); + } + break; + + case Instruction.BitwiseOr: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x | y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x | y); + } + break; + + case Instruction.ConvertI64ToI32: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => x); + } + break; + + case Instruction.Compare: + if (type == OperandType.I32 && + operation.GetSource(0).Type == type && + operation.GetSource(1).Type == type) + { + switch ((Comparison)operation.GetSource(2).Value) + { + case Comparison.Equal: + EvaluateBinaryI32(operation, (x, y) => x == y ? 1 : 0); + break; + case Comparison.NotEqual: + EvaluateBinaryI32(operation, (x, y) => x != y ? 1 : 0); + break; + case Comparison.Greater: + EvaluateBinaryI32(operation, (x, y) => x > y ? 1 : 0); + break; + case Comparison.LessOrEqual: + EvaluateBinaryI32(operation, (x, y) => x <= y ? 1 : 0); + break; + case Comparison.GreaterUI: + EvaluateBinaryI32(operation, (x, y) => (uint)x > (uint)y ? 1 : 0); + break; + case Comparison.LessOrEqualUI: + EvaluateBinaryI32(operation, (x, y) => (uint)x <= (uint)y ? 1 : 0); + break; + case Comparison.GreaterOrEqual: + EvaluateBinaryI32(operation, (x, y) => x >= y ? 1 : 0); + break; + case Comparison.Less: + EvaluateBinaryI32(operation, (x, y) => x < y ? 1 : 0); + break; + case Comparison.GreaterOrEqualUI: + EvaluateBinaryI32(operation, (x, y) => (uint)x >= (uint)y ? 1 : 0); + break; + case Comparison.LessUI: + EvaluateBinaryI32(operation, (x, y) => (uint)x < (uint)y ? 1 : 0); + break; + } + } + break; + + case Instruction.Copy: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => x); + } + break; + + case Instruction.Divide: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => y != 0 ? x / y : 0); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => y != 0 ? x / y : 0); + } + break; + + case Instruction.DivideUI: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => y != 0 ? (int)((uint)x / (uint)y) : 0); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => y != 0 ? (long)((ulong)x / (ulong)y) : 0); + } + break; + + case Instruction.Multiply: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x * y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x * y); + } + break; + + case Instruction.Negate: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => -x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => -x); + } + break; + + case Instruction.ShiftLeft: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x << y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x << (int)y); + } + break; + + case Instruction.ShiftRightSI: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x >> y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x >> (int)y); + } + break; + + case Instruction.ShiftRightUI: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => (int)((uint)x >> y)); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => (long)((ulong)x >> (int)y)); + } + break; + + case Instruction.SignExtend16: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => (short)x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => (short)x); + } + break; + + case Instruction.SignExtend32: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => (int)x); + } + break; + + case Instruction.SignExtend8: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => (sbyte)x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => (sbyte)x); + } + break; + + case Instruction.ZeroExtend16: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => (ushort)x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => (ushort)x); + } + break; + + case Instruction.ZeroExtend32: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => (uint)x); + } + break; + + case Instruction.ZeroExtend8: + if (type == OperandType.I32) + { + EvaluateUnaryI32(operation, (x) => (byte)x); + } + else if (type == OperandType.I64) + { + EvaluateUnaryI64(operation, (x) => (byte)x); + } + break; + + case Instruction.Subtract: + if (type == OperandType.I32) + { + EvaluateBinaryI32(operation, (x, y) => x - y); + } + else if (type == OperandType.I64) + { + EvaluateBinaryI64(operation, (x, y) => x - y); + } + break; + } + } + + private static bool AreAllSourcesConstant(Operation operation) + { + for (int index = 0; index < operation.SourcesCount; index++) + { + Operand srcOp = operation.GetSource(index); + + if (srcOp.Kind != OperandKind.Constant) + { + return false; + } + } + + return true; + } + + private static void EvaluateUnaryI32(Operation operation, Func<int, int> op) + { + int x = operation.GetSource(0).AsInt32(); + + operation.TurnIntoCopy(Const(op(x))); + } + + private static void EvaluateUnaryI64(Operation operation, Func<long, long> op) + { + long x = operation.GetSource(0).AsInt64(); + + operation.TurnIntoCopy(Const(op(x))); + } + + private static void EvaluateBinaryI32(Operation operation, Func<int, int, int> op) + { + int x = operation.GetSource(0).AsInt32(); + int y = operation.GetSource(1).AsInt32(); + + operation.TurnIntoCopy(Const(op(x, y))); + } + + private static void EvaluateBinaryI64(Operation operation, Func<long, long, long> op) + { + long x = operation.GetSource(0).AsInt64(); + long y = operation.GetSource(1).AsInt64(); + + operation.TurnIntoCopy(Const(op(x, y))); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Optimizations/Optimizer.cs b/src/ARMeilleure/CodeGen/Optimizations/Optimizer.cs new file mode 100644 index 00000000..a45bb455 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Optimizations/Optimizer.cs @@ -0,0 +1,252 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.CodeGen.Optimizations +{ + static class Optimizer + { + public static void RunPass(ControlFlowGraph cfg) + { + // Scratch buffer used to store uses. + Span<Operation> buffer = default; + + bool modified; + + do + { + modified = false; + + for (BasicBlock block = cfg.Blocks.Last; block != null; block = block.ListPrevious) + { + Operation node; + Operation prevNode; + + for (node = block.Operations.Last; node != default; node = prevNode) + { + prevNode = node.ListPrevious; + + if (IsUnused(node)) + { + RemoveNode(block, node); + + modified = true; + + continue; + } + else if (node.Instruction == Instruction.Phi) + { + continue; + } + + ConstantFolding.RunPass(node); + Simplification.RunPass(node); + + if (DestIsSingleLocalVar(node)) + { + if (IsPropagableCompare(node)) + { + modified |= PropagateCompare(ref buffer, node); + + if (modified && IsUnused(node)) + { + RemoveNode(block, node); + } + } + else if (IsPropagableCopy(node)) + { + PropagateCopy(ref buffer, node); + + RemoveNode(block, node); + + modified = true; + } + } + } + } + } + while (modified); + } + + public static void RemoveUnusedNodes(ControlFlowGraph cfg) + { + bool modified; + + do + { + modified = false; + + for (BasicBlock block = cfg.Blocks.Last; block != null; block = block.ListPrevious) + { + Operation node; + Operation prevNode; + + for (node = block.Operations.Last; node != default; node = prevNode) + { + prevNode = node.ListPrevious; + + if (IsUnused(node)) + { + RemoveNode(block, node); + + modified = true; + } + } + } + } + while (modified); + } + + private static bool PropagateCompare(ref Span<Operation> buffer, Operation compOp) + { + // Try to propagate Compare operations into their BranchIf uses, when these BranchIf uses are in the form + // of: + // + // - BranchIf %x, 0x0, Equal ;; i.e BranchIfFalse %x + // - BranchIf %x, 0x0, NotEqual ;; i.e BranchIfTrue %x + // + // The commutative property of Equal and NotEqual is taken into consideration as well. + // + // For example: + // + // %x = Compare %a, %b, comp + // BranchIf %x, 0x0, NotEqual + // + // => + // + // BranchIf %a, %b, comp + + static bool IsZeroBranch(Operation operation, out Comparison compType) + { + compType = Comparison.Equal; + + if (operation.Instruction != Instruction.BranchIf) + { + return false; + } + + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + Operand comp = operation.GetSource(2); + + compType = (Comparison)comp.AsInt32(); + + return (src1.Kind == OperandKind.Constant && src1.Value == 0) || + (src2.Kind == OperandKind.Constant && src2.Value == 0); + } + + bool modified = false; + + Operand dest = compOp.Destination; + Operand src1 = compOp.GetSource(0); + Operand src2 = compOp.GetSource(1); + Operand comp = compOp.GetSource(2); + + Comparison compType = (Comparison)comp.AsInt32(); + + Span<Operation> uses = dest.GetUses(ref buffer); + + foreach (Operation use in uses) + { + // If operation is a BranchIf and has a constant value 0 in its RHS or LHS source operands. + if (IsZeroBranch(use, out Comparison otherCompType)) + { + Comparison propCompType; + + if (otherCompType == Comparison.NotEqual) + { + propCompType = compType; + } + else if (otherCompType == Comparison.Equal) + { + propCompType = compType.Invert(); + } + else + { + continue; + } + + use.SetSource(0, src1); + use.SetSource(1, src2); + use.SetSource(2, Const((int)propCompType)); + + modified = true; + } + } + + return modified; + } + + private static void PropagateCopy(ref Span<Operation> buffer, Operation copyOp) + { + // Propagate copy source operand to all uses of the destination operand. + Operand dest = copyOp.Destination; + Operand source = copyOp.GetSource(0); + + Span<Operation> uses = dest.GetUses(ref buffer); + + foreach (Operation use in uses) + { + for (int index = 0; index < use.SourcesCount; index++) + { + if (use.GetSource(index) == dest) + { + use.SetSource(index, source); + } + } + } + } + + private static void RemoveNode(BasicBlock block, Operation node) + { + // Remove a node from the nodes list, and also remove itself + // from all the use lists on the operands that this node uses. + block.Operations.Remove(node); + + for (int index = 0; index < node.SourcesCount; index++) + { + node.SetSource(index, default); + } + + Debug.Assert(node.Destination == default || node.Destination.UsesCount == 0); + + node.Destination = default; + } + + private static bool IsUnused(Operation node) + { + return DestIsSingleLocalVar(node) && node.Destination.UsesCount == 0 && !HasSideEffects(node); + } + + private static bool DestIsSingleLocalVar(Operation node) + { + return node.DestinationsCount == 1 && node.Destination.Kind == OperandKind.LocalVariable; + } + + private static bool HasSideEffects(Operation node) + { + return node.Instruction == Instruction.Call + || node.Instruction == Instruction.Tailcall + || node.Instruction == Instruction.CompareAndSwap + || node.Instruction == Instruction.CompareAndSwap16 + || node.Instruction == Instruction.CompareAndSwap8; + } + + private static bool IsPropagableCompare(Operation operation) + { + return operation.Instruction == Instruction.Compare; + } + + private static bool IsPropagableCopy(Operation operation) + { + if (operation.Instruction != Instruction.Copy) + { + return false; + } + + return operation.Destination.Type == operation.GetSource(0).Type; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Optimizations/Simplification.cs b/src/ARMeilleure/CodeGen/Optimizations/Simplification.cs new file mode 100644 index 00000000..a439d642 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Optimizations/Simplification.cs @@ -0,0 +1,183 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.CodeGen.Optimizations +{ + static class Simplification + { + public static void RunPass(Operation operation) + { + switch (operation.Instruction) + { + case Instruction.Add: + if (operation.GetSource(0).Relocatable || + operation.GetSource(1).Relocatable) + { + break; + } + + TryEliminateBinaryOpComutative(operation, 0); + break; + + case Instruction.BitwiseAnd: + TryEliminateBitwiseAnd(operation); + break; + + case Instruction.BitwiseOr: + TryEliminateBitwiseOr(operation); + break; + + case Instruction.BitwiseExclusiveOr: + TryEliminateBitwiseExclusiveOr(operation); + break; + + case Instruction.ConditionalSelect: + TryEliminateConditionalSelect(operation); + break; + + case Instruction.Divide: + TryEliminateBinaryOpY(operation, 1); + break; + + case Instruction.Multiply: + TryEliminateBinaryOpComutative(operation, 1); + break; + + case Instruction.ShiftLeft: + case Instruction.ShiftRightSI: + case Instruction.ShiftRightUI: + case Instruction.Subtract: + TryEliminateBinaryOpY(operation, 0); + break; + } + } + + private static void TryEliminateBitwiseAnd(Operation operation) + { + // Try to recognize and optimize those 3 patterns (in order): + // x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y, + // x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000 + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (IsConstEqual(x, AllOnes(x.Type))) + { + operation.TurnIntoCopy(y); + } + else if (IsConstEqual(y, AllOnes(y.Type))) + { + operation.TurnIntoCopy(x); + } + else if (IsConstEqual(x, 0) || IsConstEqual(y, 0)) + { + operation.TurnIntoCopy(Const(x.Type, 0)); + } + } + + private static void TryEliminateBitwiseOr(Operation operation) + { + // Try to recognize and optimize those 3 patterns (in order): + // x | 0x00000000 == x, 0x00000000 | y == y, + // x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (IsConstEqual(x, 0)) + { + operation.TurnIntoCopy(y); + } + else if (IsConstEqual(y, 0)) + { + operation.TurnIntoCopy(x); + } + else if (IsConstEqual(x, AllOnes(x.Type)) || IsConstEqual(y, AllOnes(y.Type))) + { + operation.TurnIntoCopy(Const(AllOnes(x.Type))); + } + } + + private static void TryEliminateBitwiseExclusiveOr(Operation operation) + { + // Try to recognize and optimize those 2 patterns (in order): + // x ^ y == 0x00000000 when x == y + // 0x00000000 ^ y == y, x ^ 0x00000000 == x + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (x == y && x.Type.IsInteger()) + { + operation.TurnIntoCopy(Const(x.Type, 0)); + } + else + { + TryEliminateBinaryOpComutative(operation, 0); + } + } + + private static void TryEliminateBinaryOpY(Operation operation, ulong comparand) + { + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (IsConstEqual(y, comparand)) + { + operation.TurnIntoCopy(x); + } + } + + private static void TryEliminateBinaryOpComutative(Operation operation, ulong comparand) + { + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (IsConstEqual(x, comparand)) + { + operation.TurnIntoCopy(y); + } + else if (IsConstEqual(y, comparand)) + { + operation.TurnIntoCopy(x); + } + } + + private static void TryEliminateConditionalSelect(Operation operation) + { + Operand cond = operation.GetSource(0); + + if (cond.Kind != OperandKind.Constant) + { + return; + } + + // The condition is constant, we can turn it into a copy, and select + // the source based on the condition value. + int srcIndex = cond.Value != 0 ? 1 : 2; + + Operand source = operation.GetSource(srcIndex); + + operation.TurnIntoCopy(source); + } + + private static bool IsConstEqual(Operand operand, ulong comparand) + { + if (operand.Kind != OperandKind.Constant || !operand.Type.IsInteger()) + { + return false; + } + + return operand.Value == comparand; + } + + private static ulong AllOnes(OperandType type) + { + switch (type) + { + case OperandType.I32: return ~0U; + case OperandType.I64: return ~0UL; + } + + throw new ArgumentException("Invalid operand type \"" + type + "\"."); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Optimizations/TailMerge.cs b/src/ARMeilleure/CodeGen/Optimizations/TailMerge.cs new file mode 100644 index 00000000..e94df159 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Optimizations/TailMerge.cs @@ -0,0 +1,83 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.CodeGen.Optimizations +{ + static class TailMerge + { + public static void RunPass(in CompilerContext cctx) + { + ControlFlowGraph cfg = cctx.Cfg; + + BasicBlock mergedReturn = new(cfg.Blocks.Count); + + Operand returnValue; + Operation returnOp; + + if (cctx.FuncReturnType == OperandType.None) + { + returnValue = default; + returnOp = Operation(Instruction.Return, default); + } + else + { + returnValue = cfg.AllocateLocal(cctx.FuncReturnType); + returnOp = Operation(Instruction.Return, default, returnValue); + } + + mergedReturn.Frequency = BasicBlockFrequency.Cold; + mergedReturn.Operations.AddLast(returnOp); + + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + Operation op = block.Operations.Last; + + if (op != default && op.Instruction == Instruction.Return) + { + block.Operations.Remove(op); + + if (cctx.FuncReturnType == OperandType.None) + { + PrepareMerge(block, mergedReturn); + } + else + { + Operation copyOp = Operation(Instruction.Copy, returnValue, op.GetSource(0)); + + PrepareMerge(block, mergedReturn).Append(copyOp); + } + } + } + + cfg.Blocks.AddLast(mergedReturn); + cfg.Update(); + } + + private static BasicBlock PrepareMerge(BasicBlock from, BasicBlock to) + { + BasicBlock fromPred = from.Predecessors.Count == 1 ? from.Predecessors[0] : null; + + // If the block is empty, we can try to append to the predecessor and avoid unnecessary jumps. + if (from.Operations.Count == 0 && fromPred != null && fromPred.SuccessorsCount == 1) + { + for (int i = 0; i < fromPred.SuccessorsCount; i++) + { + if (fromPred.GetSuccessor(i) == from) + { + fromPred.SetSuccessor(i, to); + } + } + + // NOTE: `from` becomes unreachable and the call to `cfg.Update()` will remove it. + return fromPred; + } + else + { + from.AddSuccessor(to); + + return from; + } + } + } +} diff --git a/src/ARMeilleure/CodeGen/PreAllocatorCommon.cs b/src/ARMeilleure/CodeGen/PreAllocatorCommon.cs new file mode 100644 index 00000000..53f279fb --- /dev/null +++ b/src/ARMeilleure/CodeGen/PreAllocatorCommon.cs @@ -0,0 +1,57 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.CodeGen +{ + static class PreAllocatorCommon + { + public static void Propagate(ref Span<Operation> buffer, Operand dest, Operand value) + { + ReadOnlySpan<Operation> uses = dest.GetUses(ref buffer); + + foreach (Operation use in uses) + { + for (int srcIndex = 0; srcIndex < use.SourcesCount; srcIndex++) + { + Operand useSrc = use.GetSource(srcIndex); + + if (useSrc == dest) + { + use.SetSource(srcIndex, value); + } + else if (useSrc.Kind == OperandKind.Memory) + { + MemoryOperand memoryOp = useSrc.GetMemory(); + + Operand baseAddr = memoryOp.BaseAddress; + Operand index = memoryOp.Index; + bool changed = false; + + if (baseAddr == dest) + { + baseAddr = value; + changed = true; + } + + if (index == dest) + { + index = value; + changed = true; + } + + if (changed) + { + use.SetSource(srcIndex, MemoryOp( + useSrc.Type, + baseAddr, + index, + memoryOp.Scale, + memoryOp.Displacement)); + } + } + } + } + } + } +} diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs new file mode 100644 index 00000000..43e5c7e2 --- /dev/null +++ b/src/ARMeilleure/CodeGen/RegisterAllocators/AllocationResult.cs @@ -0,0 +1,19 @@ +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + readonly struct AllocationResult + { + public int IntUsedRegisters { get; } + public int VecUsedRegisters { get; } + public int SpillRegionSize { get; } + + public AllocationResult( + int intUsedRegisters, + int vecUsedRegisters, + int spillRegionSize) + { + IntUsedRegisters = intUsedRegisters; + VecUsedRegisters = vecUsedRegisters; + SpillRegionSize = spillRegionSize; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs new file mode 100644 index 00000000..587b1a02 --- /dev/null +++ b/src/ARMeilleure/CodeGen/RegisterAllocators/CopyResolver.cs @@ -0,0 +1,259 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Collections.Generic; + +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + class CopyResolver + { + private class ParallelCopy + { + private readonly struct Copy + { + public Register Dest { get; } + public Register Source { get; } + + public OperandType Type { get; } + + public Copy(Register dest, Register source, OperandType type) + { + Dest = dest; + Source = source; + Type = type; + } + } + + private readonly List<Copy> _copies; + + public int Count => _copies.Count; + + public ParallelCopy() + { + _copies = new List<Copy>(); + } + + public void AddCopy(Register dest, Register source, OperandType type) + { + _copies.Add(new Copy(dest, source, type)); + } + + public void Sequence(List<Operation> sequence) + { + Dictionary<Register, Register> locations = new Dictionary<Register, Register>(); + Dictionary<Register, Register> sources = new Dictionary<Register, Register>(); + + Dictionary<Register, OperandType> types = new Dictionary<Register, OperandType>(); + + Queue<Register> pendingQueue = new Queue<Register>(); + Queue<Register> readyQueue = new Queue<Register>(); + + foreach (Copy copy in _copies) + { + locations[copy.Source] = copy.Source; + sources[copy.Dest] = copy.Source; + types[copy.Dest] = copy.Type; + + pendingQueue.Enqueue(copy.Dest); + } + + foreach (Copy copy in _copies) + { + // If the destination is not used anywhere, we can assign it immediately. + if (!locations.ContainsKey(copy.Dest)) + { + readyQueue.Enqueue(copy.Dest); + } + } + + while (pendingQueue.TryDequeue(out Register current)) + { + Register copyDest; + Register origSource; + Register copySource; + + while (readyQueue.TryDequeue(out copyDest)) + { + origSource = sources[copyDest]; + copySource = locations[origSource]; + + OperandType type = types[copyDest]; + + EmitCopy(sequence, GetRegister(copyDest, type), GetRegister(copySource, type)); + + locations[origSource] = copyDest; + + if (origSource == copySource && sources.ContainsKey(origSource)) + { + readyQueue.Enqueue(origSource); + } + } + + copyDest = current; + origSource = sources[copyDest]; + copySource = locations[origSource]; + + if (copyDest != copySource) + { + OperandType type = types[copyDest]; + + type = type.IsInteger() ? OperandType.I64 : OperandType.V128; + + EmitXorSwap(sequence, GetRegister(copyDest, type), GetRegister(copySource, type)); + + locations[origSource] = copyDest; + + Register swapOther = copySource; + + if (copyDest != locations[sources[copySource]]) + { + // Find the other swap destination register. + // To do that, we search all the pending registers, and pick + // the one where the copy source register is equal to the + // current destination register being processed (copyDest). + foreach (Register pending in pendingQueue) + { + // Is this a copy of pending <- copyDest? + if (copyDest == locations[sources[pending]]) + { + swapOther = pending; + + break; + } + } + } + + // The value that was previously at "copyDest" now lives on + // "copySource" thanks to the swap, now we need to update the + // location for the next copy that is supposed to copy the value + // that used to live on "copyDest". + locations[sources[swapOther]] = copySource; + } + } + } + + private static void EmitCopy(List<Operation> sequence, Operand x, Operand y) + { + sequence.Add(Operation(Instruction.Copy, x, y)); + } + + private static void EmitXorSwap(List<Operation> sequence, Operand x, Operand y) + { + sequence.Add(Operation(Instruction.BitwiseExclusiveOr, x, x, y)); + sequence.Add(Operation(Instruction.BitwiseExclusiveOr, y, y, x)); + sequence.Add(Operation(Instruction.BitwiseExclusiveOr, x, x, y)); + } + } + + private Queue<Operation> _fillQueue = null; + private Queue<Operation> _spillQueue = null; + private ParallelCopy _parallelCopy = null; + + public bool HasCopy { get; private set; } + + public void AddSplit(LiveInterval left, LiveInterval right) + { + if (left.Local != right.Local) + { + throw new ArgumentException("Intervals of different variables are not allowed."); + } + + OperandType type = left.Local.Type; + + if (left.IsSpilled && !right.IsSpilled) + { + // Move from the stack to a register. + AddSplitFill(left, right, type); + } + else if (!left.IsSpilled && right.IsSpilled) + { + // Move from a register to the stack. + AddSplitSpill(left, right, type); + } + else if (!left.IsSpilled && !right.IsSpilled && left.Register != right.Register) + { + // Move from one register to another. + AddSplitCopy(left, right, type); + } + else if (left.SpillOffset != right.SpillOffset) + { + // This would be the stack-to-stack move case, but this is not supported. + throw new ArgumentException("Both intervals were spilled."); + } + } + + private void AddSplitFill(LiveInterval left, LiveInterval right, OperandType type) + { + if (_fillQueue == null) + { + _fillQueue = new Queue<Operation>(); + } + + Operand register = GetRegister(right.Register, type); + Operand offset = Const(left.SpillOffset); + + _fillQueue.Enqueue(Operation(Instruction.Fill, register, offset)); + + HasCopy = true; + } + + private void AddSplitSpill(LiveInterval left, LiveInterval right, OperandType type) + { + if (_spillQueue == null) + { + _spillQueue = new Queue<Operation>(); + } + + Operand offset = Const(right.SpillOffset); + Operand register = GetRegister(left.Register, type); + + _spillQueue.Enqueue(Operation(Instruction.Spill, default, offset, register)); + + HasCopy = true; + } + + private void AddSplitCopy(LiveInterval left, LiveInterval right, OperandType type) + { + if (_parallelCopy == null) + { + _parallelCopy = new ParallelCopy(); + } + + _parallelCopy.AddCopy(right.Register, left.Register, type); + + HasCopy = true; + } + + public Operation[] Sequence() + { + List<Operation> sequence = new List<Operation>(); + + if (_spillQueue != null) + { + while (_spillQueue.TryDequeue(out Operation spillOp)) + { + sequence.Add(spillOp); + } + } + + _parallelCopy?.Sequence(sequence); + + if (_fillQueue != null) + { + while (_fillQueue.TryDequeue(out Operation fillOp)) + { + sequence.Add(fillOp); + } + } + + return sequence.ToArray(); + } + + private static Operand GetRegister(Register reg, OperandType type) + { + return Register(reg.Index, reg.Type, type); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs new file mode 100644 index 00000000..25952c77 --- /dev/null +++ b/src/ARMeilleure/CodeGen/RegisterAllocators/HybridAllocator.cs @@ -0,0 +1,454 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; +using System.Numerics; +using System.Runtime.CompilerServices; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + class HybridAllocator : IRegisterAllocator + { + private readonly struct BlockInfo + { + public bool HasCall { get; } + + public int IntFixedRegisters { get; } + public int VecFixedRegisters { get; } + + public BlockInfo(bool hasCall, int intFixedRegisters, int vecFixedRegisters) + { + HasCall = hasCall; + IntFixedRegisters = intFixedRegisters; + VecFixedRegisters = vecFixedRegisters; + } + } + + private struct LocalInfo + { + public int Uses { get; set; } + public int UsesAllocated { get; set; } + public int Sequence { get; set; } + public Operand Temp { get; set; } + public Operand Register { get; set; } + public Operand SpillOffset { get; set; } + public OperandType Type { get; } + + private int _first; + private int _last; + + public bool IsBlockLocal => _first == _last; + + public LocalInfo(OperandType type, int uses, int blkIndex) + { + Uses = uses; + Type = type; + + UsesAllocated = 0; + Sequence = 0; + Temp = default; + Register = default; + SpillOffset = default; + + _first = -1; + _last = -1; + + SetBlockIndex(blkIndex); + } + + public void SetBlockIndex(int blkIndex) + { + if (_first == -1 || blkIndex < _first) + { + _first = blkIndex; + } + + if (_last == -1 || blkIndex > _last) + { + _last = blkIndex; + } + } + } + + private const int MaxIROperands = 4; + // The "visited" state is stored in the MSB of the local's value. + private const ulong VisitedMask = 1ul << 63; + + private BlockInfo[] _blockInfo; + private LocalInfo[] _localInfo; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool IsVisited(Operand local) + { + Debug.Assert(local.Kind == OperandKind.LocalVariable); + + return (local.GetValueUnsafe() & VisitedMask) != 0; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void SetVisited(Operand local) + { + Debug.Assert(local.Kind == OperandKind.LocalVariable); + + local.GetValueUnsafe() |= VisitedMask; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private ref LocalInfo GetLocalInfo(Operand local) + { + Debug.Assert(local.Kind == OperandKind.LocalVariable); + Debug.Assert(IsVisited(local), "Local variable not visited. Used before defined?"); + + return ref _localInfo[(uint)local.GetValueUnsafe() - 1]; + } + + public AllocationResult RunPass(ControlFlowGraph cfg, StackAllocator stackAlloc, RegisterMasks regMasks) + { + int intUsedRegisters = 0; + int vecUsedRegisters = 0; + + int intFreeRegisters = regMasks.IntAvailableRegisters; + int vecFreeRegisters = regMasks.VecAvailableRegisters; + + _blockInfo = new BlockInfo[cfg.Blocks.Count]; + _localInfo = new LocalInfo[cfg.Blocks.Count * 3]; + + int localInfoCount = 0; + + for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + int intFixedRegisters = 0; + int vecFixedRegisters = 0; + + bool hasCall = false; + + for (Operation node = block.Operations.First; node != default; node = node.ListNext) + { + if (node.Instruction == Instruction.Call) + { + hasCall = true; + } + + foreach (Operand source in node.SourcesUnsafe) + { + if (source.Kind == OperandKind.LocalVariable) + { + GetLocalInfo(source).SetBlockIndex(block.Index); + } + else if (source.Kind == OperandKind.Memory) + { + MemoryOperand memOp = source.GetMemory(); + + if (memOp.BaseAddress != default) + { + GetLocalInfo(memOp.BaseAddress).SetBlockIndex(block.Index); + } + + if (memOp.Index != default) + { + GetLocalInfo(memOp.Index).SetBlockIndex(block.Index); + } + } + } + + foreach (Operand dest in node.DestinationsUnsafe) + { + if (dest.Kind == OperandKind.LocalVariable) + { + if (IsVisited(dest)) + { + GetLocalInfo(dest).SetBlockIndex(block.Index); + } + else + { + dest.NumberLocal(++localInfoCount); + + if (localInfoCount > _localInfo.Length) + { + Array.Resize(ref _localInfo, localInfoCount * 2); + } + + SetVisited(dest); + GetLocalInfo(dest) = new LocalInfo(dest.Type, UsesCount(dest), block.Index); + } + } + else if (dest.Kind == OperandKind.Register) + { + if (dest.Type.IsInteger()) + { + intFixedRegisters |= 1 << dest.GetRegister().Index; + } + else + { + vecFixedRegisters |= 1 << dest.GetRegister().Index; + } + } + } + } + + _blockInfo[block.Index] = new BlockInfo(hasCall, intFixedRegisters, vecFixedRegisters); + } + + int sequence = 0; + + for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + ref BlockInfo blkInfo = ref _blockInfo[block.Index]; + + int intLocalFreeRegisters = intFreeRegisters & ~blkInfo.IntFixedRegisters; + int vecLocalFreeRegisters = vecFreeRegisters & ~blkInfo.VecFixedRegisters; + + int intCallerSavedRegisters = blkInfo.HasCall ? regMasks.IntCallerSavedRegisters : 0; + int vecCallerSavedRegisters = blkInfo.HasCall ? regMasks.VecCallerSavedRegisters : 0; + + int intSpillTempRegisters = SelectSpillTemps( + intCallerSavedRegisters & ~blkInfo.IntFixedRegisters, + intLocalFreeRegisters); + int vecSpillTempRegisters = SelectSpillTemps( + vecCallerSavedRegisters & ~blkInfo.VecFixedRegisters, + vecLocalFreeRegisters); + + intLocalFreeRegisters &= ~(intSpillTempRegisters | intCallerSavedRegisters); + vecLocalFreeRegisters &= ~(vecSpillTempRegisters | vecCallerSavedRegisters); + + for (Operation node = block.Operations.First; node != default; node = node.ListNext) + { + int intLocalUse = 0; + int vecLocalUse = 0; + + Operand AllocateRegister(Operand local) + { + ref LocalInfo info = ref GetLocalInfo(local); + + info.UsesAllocated++; + + Debug.Assert(info.UsesAllocated <= info.Uses); + + if (info.Register != default) + { + if (info.UsesAllocated == info.Uses) + { + Register reg = info.Register.GetRegister(); + + if (local.Type.IsInteger()) + { + intLocalFreeRegisters |= 1 << reg.Index; + } + else + { + vecLocalFreeRegisters |= 1 << reg.Index; + } + } + + return info.Register; + } + else + { + Operand temp = info.Temp; + + if (temp == default || info.Sequence != sequence) + { + temp = local.Type.IsInteger() + ? GetSpillTemp(local, intSpillTempRegisters, ref intLocalUse) + : GetSpillTemp(local, vecSpillTempRegisters, ref vecLocalUse); + + info.Sequence = sequence; + info.Temp = temp; + } + + Operation fillOp = Operation(Instruction.Fill, temp, info.SpillOffset); + + block.Operations.AddBefore(node, fillOp); + + return temp; + } + } + + bool folded = false; + + // If operation is a copy of a local and that local is living on the stack, we turn the copy into + // a fill, instead of inserting a fill before it. + if (node.Instruction == Instruction.Copy) + { + Operand source = node.GetSource(0); + + if (source.Kind == OperandKind.LocalVariable) + { + ref LocalInfo info = ref GetLocalInfo(source); + + if (info.Register == default) + { + Operation fillOp = Operation(Instruction.Fill, node.Destination, info.SpillOffset); + + block.Operations.AddBefore(node, fillOp); + block.Operations.Remove(node); + + node = fillOp; + + folded = true; + } + } + } + + if (!folded) + { + foreach (ref Operand source in node.SourcesUnsafe) + { + if (source.Kind == OperandKind.LocalVariable) + { + source = AllocateRegister(source); + } + else if (source.Kind == OperandKind.Memory) + { + MemoryOperand memOp = source.GetMemory(); + + if (memOp.BaseAddress != default) + { + memOp.BaseAddress = AllocateRegister(memOp.BaseAddress); + } + + if (memOp.Index != default) + { + memOp.Index = AllocateRegister(memOp.Index); + } + } + } + } + + int intLocalAsg = 0; + int vecLocalAsg = 0; + + foreach (ref Operand dest in node.DestinationsUnsafe) + { + if (dest.Kind != OperandKind.LocalVariable) + { + continue; + } + + ref LocalInfo info = ref GetLocalInfo(dest); + + if (info.UsesAllocated == 0) + { + int mask = dest.Type.IsInteger() + ? intLocalFreeRegisters + : vecLocalFreeRegisters; + + if (info.IsBlockLocal && mask != 0) + { + int selectedReg = BitOperations.TrailingZeroCount(mask); + + info.Register = Register(selectedReg, info.Type.ToRegisterType(), info.Type); + + if (dest.Type.IsInteger()) + { + intLocalFreeRegisters &= ~(1 << selectedReg); + intUsedRegisters |= 1 << selectedReg; + } + else + { + vecLocalFreeRegisters &= ~(1 << selectedReg); + vecUsedRegisters |= 1 << selectedReg; + } + } + else + { + info.Register = default; + info.SpillOffset = Const(stackAlloc.Allocate(dest.Type.GetSizeInBytes())); + } + } + + info.UsesAllocated++; + + Debug.Assert(info.UsesAllocated <= info.Uses); + + if (info.Register != default) + { + dest = info.Register; + } + else + { + Operand temp = info.Temp; + + if (temp == default || info.Sequence != sequence) + { + temp = dest.Type.IsInteger() + ? GetSpillTemp(dest, intSpillTempRegisters, ref intLocalAsg) + : GetSpillTemp(dest, vecSpillTempRegisters, ref vecLocalAsg); + + info.Sequence = sequence; + info.Temp = temp; + } + + dest = temp; + + Operation spillOp = Operation(Instruction.Spill, default, info.SpillOffset, temp); + + block.Operations.AddAfter(node, spillOp); + + node = spillOp; + } + } + + sequence++; + + intUsedRegisters |= intLocalAsg | intLocalUse; + vecUsedRegisters |= vecLocalAsg | vecLocalUse; + } + } + + return new AllocationResult(intUsedRegisters, vecUsedRegisters, stackAlloc.TotalSize); + } + + private static int SelectSpillTemps(int mask0, int mask1) + { + int selection = 0; + int count = 0; + + while (count < MaxIROperands && mask0 != 0) + { + int mask = mask0 & -mask0; + + selection |= mask; + + mask0 &= ~mask; + + count++; + } + + while (count < MaxIROperands && mask1 != 0) + { + int mask = mask1 & -mask1; + + selection |= mask; + + mask1 &= ~mask; + + count++; + } + + Debug.Assert(count == MaxIROperands, "No enough registers for spill temps."); + + return selection; + } + + private static Operand GetSpillTemp(Operand local, int freeMask, ref int useMask) + { + int selectedReg = BitOperations.TrailingZeroCount(freeMask & ~useMask); + + useMask |= 1 << selectedReg; + + return Register(selectedReg, local.Type.ToRegisterType(), local.Type); + } + + private static int UsesCount(Operand local) + { + return local.AssignmentsCount + local.UsesCount; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs new file mode 100644 index 00000000..8f236c25 --- /dev/null +++ b/src/ARMeilleure/CodeGen/RegisterAllocators/IRegisterAllocator.cs @@ -0,0 +1,12 @@ +using ARMeilleure.Translation; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + interface IRegisterAllocator + { + AllocationResult RunPass( + ControlFlowGraph cfg, + StackAllocator stackAlloc, + RegisterMasks regMasks); + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs new file mode 100644 index 00000000..d80157af --- /dev/null +++ b/src/ARMeilleure/CodeGen/RegisterAllocators/LinearScanAllocator.cs @@ -0,0 +1,1101 @@ +using ARMeilleure.Common; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Numerics; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + // Based on: + // "Linear Scan Register Allocation for the Java(tm) HotSpot Client Compiler". + // http://www.christianwimmer.at/Publications/Wimmer04a/Wimmer04a.pdf + class LinearScanAllocator : IRegisterAllocator + { + private const int InstructionGap = 2; + private const int InstructionGapMask = InstructionGap - 1; + + private HashSet<int> _blockEdges; + private LiveRange[] _blockRanges; + private BitMap[] _blockLiveIn; + + private List<LiveInterval> _intervals; + private LiveInterval[] _parentIntervals; + + private List<(IntrusiveList<Operation>, Operation)> _operationNodes; + private int _operationsCount; + + private class AllocationContext + { + public RegisterMasks Masks { get; } + + public StackAllocator StackAlloc { get; } + + public BitMap Active { get; } + public BitMap Inactive { get; } + + public int IntUsedRegisters { get; set; } + public int VecUsedRegisters { get; set; } + + private readonly int[] _intFreePositions; + private readonly int[] _vecFreePositions; + private readonly int _intFreePositionsCount; + private readonly int _vecFreePositionsCount; + + public AllocationContext(StackAllocator stackAlloc, RegisterMasks masks, int intervalsCount) + { + StackAlloc = stackAlloc; + Masks = masks; + + Active = new BitMap(Allocators.Default, intervalsCount); + Inactive = new BitMap(Allocators.Default, intervalsCount); + + PopulateFreePositions(RegisterType.Integer, out _intFreePositions, out _intFreePositionsCount); + PopulateFreePositions(RegisterType.Vector, out _vecFreePositions, out _vecFreePositionsCount); + + void PopulateFreePositions(RegisterType type, out int[] positions, out int count) + { + positions = new int[masks.RegistersCount]; + count = BitOperations.PopCount((uint)masks.GetAvailableRegisters(type)); + + int mask = masks.GetAvailableRegisters(type); + + for (int i = 0; i < positions.Length; i++) + { + if ((mask & (1 << i)) != 0) + { + positions[i] = int.MaxValue; + } + } + } + } + + public void GetFreePositions(RegisterType type, in Span<int> positions, out int count) + { + if (type == RegisterType.Integer) + { + _intFreePositions.CopyTo(positions); + + count = _intFreePositionsCount; + } + else + { + Debug.Assert(type == RegisterType.Vector); + + _vecFreePositions.CopyTo(positions); + + count = _vecFreePositionsCount; + } + } + + public void MoveActiveToInactive(int bit) + { + Move(Active, Inactive, bit); + } + + public void MoveInactiveToActive(int bit) + { + Move(Inactive, Active, bit); + } + + private static void Move(BitMap source, BitMap dest, int bit) + { + source.Clear(bit); + + dest.Set(bit); + } + } + + public AllocationResult RunPass( + ControlFlowGraph cfg, + StackAllocator stackAlloc, + RegisterMasks regMasks) + { + NumberLocals(cfg, regMasks.RegistersCount); + + var context = new AllocationContext(stackAlloc, regMasks, _intervals.Count); + + BuildIntervals(cfg, context); + + for (int index = 0; index < _intervals.Count; index++) + { + LiveInterval current = _intervals[index]; + + if (current.IsEmpty) + { + continue; + } + + if (current.IsFixed) + { + context.Active.Set(index); + + if (current.IsFixedAndUsed) + { + if (current.Register.Type == RegisterType.Integer) + { + context.IntUsedRegisters |= 1 << current.Register.Index; + } + else /* if (interval.Register.Type == RegisterType.Vector) */ + { + context.VecUsedRegisters |= 1 << current.Register.Index; + } + } + + continue; + } + + AllocateInterval(context, current, index, regMasks.RegistersCount); + } + + for (int index = regMasks.RegistersCount * 2; index < _intervals.Count; index++) + { + if (!_intervals[index].IsSpilled) + { + ReplaceLocalWithRegister(_intervals[index]); + } + } + + InsertSplitCopies(); + InsertSplitCopiesAtEdges(cfg); + + return new AllocationResult(context.IntUsedRegisters, context.VecUsedRegisters, context.StackAlloc.TotalSize); + } + + private void AllocateInterval(AllocationContext context, LiveInterval current, int cIndex, int registersCount) + { + // Check active intervals that already ended. + foreach (int iIndex in context.Active) + { + LiveInterval interval = _intervals[iIndex]; + + interval.Forward(current.GetStart()); + + if (interval.GetEnd() < current.GetStart()) + { + context.Active.Clear(iIndex); + } + else if (!interval.Overlaps(current.GetStart())) + { + context.MoveActiveToInactive(iIndex); + } + } + + // Check inactive intervals that already ended or were reactivated. + foreach (int iIndex in context.Inactive) + { + LiveInterval interval = _intervals[iIndex]; + + interval.Forward(current.GetStart()); + + if (interval.GetEnd() < current.GetStart()) + { + context.Inactive.Clear(iIndex); + } + else if (interval.Overlaps(current.GetStart())) + { + context.MoveInactiveToActive(iIndex); + } + } + + if (!TryAllocateRegWithoutSpill(context, current, cIndex, registersCount)) + { + AllocateRegWithSpill(context, current, cIndex, registersCount); + } + } + + private bool TryAllocateRegWithoutSpill(AllocationContext context, LiveInterval current, int cIndex, int registersCount) + { + RegisterType regType = current.Local.Type.ToRegisterType(); + + Span<int> freePositions = stackalloc int[registersCount]; + + context.GetFreePositions(regType, freePositions, out int freePositionsCount); + + foreach (int iIndex in context.Active) + { + LiveInterval interval = _intervals[iIndex]; + Register reg = interval.Register; + + if (reg.Type == regType) + { + freePositions[reg.Index] = 0; + freePositionsCount--; + } + } + + // If all registers are already active, return early. No point in inspecting the inactive set to look for + // holes. + if (freePositionsCount == 0) + { + return false; + } + + foreach (int iIndex in context.Inactive) + { + LiveInterval interval = _intervals[iIndex]; + Register reg = interval.Register; + + ref int freePosition = ref freePositions[reg.Index]; + + if (reg.Type == regType && freePosition != 0) + { + int overlapPosition = interval.GetOverlapPosition(current); + + if (overlapPosition != LiveInterval.NotFound && freePosition > overlapPosition) + { + freePosition = overlapPosition; + } + } + } + + int selectedReg = GetHighestValueIndex(freePositions); + int selectedNextUse = freePositions[selectedReg]; + + // Intervals starts and ends at odd positions, unless they span an entire + // block, in this case they will have ranges at a even position. + // When a interval is loaded from the stack to a register, we can only + // do the split at a odd position, because otherwise the split interval + // that is inserted on the list to be processed may clobber a register + // used by the instruction at the same position as the split. + // The problem only happens when a interval ends exactly at this instruction, + // because otherwise they would interfere, and the register wouldn't be selected. + // When the interval is aligned and the above happens, there's no problem as + // the instruction that is actually with the last use is the one + // before that position. + selectedNextUse &= ~InstructionGapMask; + + if (selectedNextUse <= current.GetStart()) + { + return false; + } + else if (selectedNextUse < current.GetEnd()) + { + LiveInterval splitChild = current.Split(selectedNextUse); + + if (splitChild.UsesCount != 0) + { + Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); + + InsertInterval(splitChild, registersCount); + } + else + { + Spill(context, splitChild); + } + } + + current.Register = new Register(selectedReg, regType); + + if (regType == RegisterType.Integer) + { + context.IntUsedRegisters |= 1 << selectedReg; + } + else /* if (regType == RegisterType.Vector) */ + { + context.VecUsedRegisters |= 1 << selectedReg; + } + + context.Active.Set(cIndex); + + return true; + } + + private void AllocateRegWithSpill(AllocationContext context, LiveInterval current, int cIndex, int registersCount) + { + RegisterType regType = current.Local.Type.ToRegisterType(); + + Span<int> usePositions = stackalloc int[registersCount]; + Span<int> blockedPositions = stackalloc int[registersCount]; + + context.GetFreePositions(regType, usePositions, out _); + context.GetFreePositions(regType, blockedPositions, out _); + + foreach (int iIndex in context.Active) + { + LiveInterval interval = _intervals[iIndex]; + Register reg = interval.Register; + + if (reg.Type == regType) + { + ref int usePosition = ref usePositions[reg.Index]; + ref int blockedPosition = ref blockedPositions[reg.Index]; + + if (interval.IsFixed) + { + usePosition = 0; + blockedPosition = 0; + } + else + { + int nextUse = interval.NextUseAfter(current.GetStart()); + + if (nextUse != LiveInterval.NotFound && usePosition > nextUse) + { + usePosition = nextUse; + } + } + } + } + + foreach (int iIndex in context.Inactive) + { + LiveInterval interval = _intervals[iIndex]; + Register reg = interval.Register; + + if (reg.Type == regType) + { + ref int usePosition = ref usePositions[reg.Index]; + ref int blockedPosition = ref blockedPositions[reg.Index]; + + if (interval.IsFixed) + { + int overlapPosition = interval.GetOverlapPosition(current); + + if (overlapPosition != LiveInterval.NotFound) + { + blockedPosition = Math.Min(blockedPosition, overlapPosition); + usePosition = Math.Min(usePosition, overlapPosition); + } + } + else if (interval.Overlaps(current)) + { + int nextUse = interval.NextUseAfter(current.GetStart()); + + if (nextUse != LiveInterval.NotFound && usePosition > nextUse) + { + usePosition = nextUse; + } + } + } + } + + int selectedReg = GetHighestValueIndex(usePositions); + int currentFirstUse = current.FirstUse(); + + Debug.Assert(currentFirstUse >= 0, "Current interval has no uses."); + + if (usePositions[selectedReg] < currentFirstUse) + { + // All intervals on inactive and active are being used before current, + // so spill the current interval. + Debug.Assert(currentFirstUse > current.GetStart(), "Trying to spill a interval currently being used."); + + LiveInterval splitChild = current.Split(currentFirstUse); + + Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); + + InsertInterval(splitChild, registersCount); + + Spill(context, current); + } + else if (blockedPositions[selectedReg] > current.GetEnd()) + { + // Spill made the register available for the entire current lifetime, + // so we only need to split the intervals using the selected register. + current.Register = new Register(selectedReg, regType); + + SplitAndSpillOverlappingIntervals(context, current, registersCount); + + context.Active.Set(cIndex); + } + else + { + // There are conflicts even after spill due to the use of fixed registers + // that can't be spilled, so we need to also split current at the point of + // the first fixed register use. + current.Register = new Register(selectedReg, regType); + + int splitPosition = blockedPositions[selectedReg] & ~InstructionGapMask; + + Debug.Assert(splitPosition > current.GetStart(), "Trying to split a interval at a invalid position."); + + LiveInterval splitChild = current.Split(splitPosition); + + if (splitChild.UsesCount != 0) + { + Debug.Assert(splitChild.GetStart() > current.GetStart(), "Split interval has an invalid start position."); + + InsertInterval(splitChild, registersCount); + } + else + { + Spill(context, splitChild); + } + + SplitAndSpillOverlappingIntervals(context, current, registersCount); + + context.Active.Set(cIndex); + } + } + + private static int GetHighestValueIndex(Span<int> span) + { + int highest = int.MinValue; + + int selected = 0; + + for (int index = 0; index < span.Length; index++) + { + int current = span[index]; + + if (highest < current) + { + highest = current; + selected = index; + + if (current == int.MaxValue) + { + break; + } + } + } + + return selected; + } + + private void SplitAndSpillOverlappingIntervals(AllocationContext context, LiveInterval current, int registersCount) + { + foreach (int iIndex in context.Active) + { + LiveInterval interval = _intervals[iIndex]; + + if (!interval.IsFixed && interval.Register == current.Register) + { + SplitAndSpillOverlappingInterval(context, current, interval, registersCount); + + context.Active.Clear(iIndex); + } + } + + foreach (int iIndex in context.Inactive) + { + LiveInterval interval = _intervals[iIndex]; + + if (!interval.IsFixed && interval.Register == current.Register && interval.Overlaps(current)) + { + SplitAndSpillOverlappingInterval(context, current, interval, registersCount); + + context.Inactive.Clear(iIndex); + } + } + } + + private void SplitAndSpillOverlappingInterval( + AllocationContext context, + LiveInterval current, + LiveInterval interval, + int registersCount) + { + // If there's a next use after the start of the current interval, + // we need to split the spilled interval twice, and re-insert it + // on the "pending" list to ensure that it will get a new register + // on that use position. + int nextUse = interval.NextUseAfter(current.GetStart()); + + LiveInterval splitChild; + + if (interval.GetStart() < current.GetStart()) + { + splitChild = interval.Split(current.GetStart()); + } + else + { + splitChild = interval; + } + + if (nextUse != -1) + { + Debug.Assert(nextUse > current.GetStart(), "Trying to spill a interval currently being used."); + + if (nextUse > splitChild.GetStart()) + { + LiveInterval right = splitChild.Split(nextUse); + + Spill(context, splitChild); + + splitChild = right; + } + + InsertInterval(splitChild, registersCount); + } + else + { + Spill(context, splitChild); + } + } + + private void InsertInterval(LiveInterval interval, int registersCount) + { + Debug.Assert(interval.UsesCount != 0, "Trying to insert a interval without uses."); + Debug.Assert(!interval.IsEmpty, "Trying to insert a empty interval."); + Debug.Assert(!interval.IsSpilled, "Trying to insert a spilled interval."); + + int startIndex = registersCount * 2; + + int insertIndex = _intervals.BinarySearch(startIndex, _intervals.Count - startIndex, interval, null); + + if (insertIndex < 0) + { + insertIndex = ~insertIndex; + } + + _intervals.Insert(insertIndex, interval); + } + + private void Spill(AllocationContext context, LiveInterval interval) + { + Debug.Assert(!interval.IsFixed, "Trying to spill a fixed interval."); + Debug.Assert(interval.UsesCount == 0, "Trying to spill a interval with uses."); + + // We first check if any of the siblings were spilled, if so we can reuse + // the stack offset. Otherwise, we allocate a new space on the stack. + // This prevents stack-to-stack copies being necessary for a split interval. + if (!interval.TrySpillWithSiblingOffset()) + { + interval.Spill(context.StackAlloc.Allocate(interval.Local.Type)); + } + } + + private void InsertSplitCopies() + { + Dictionary<int, CopyResolver> copyResolvers = new Dictionary<int, CopyResolver>(); + + CopyResolver GetCopyResolver(int position) + { + if (!copyResolvers.TryGetValue(position, out CopyResolver copyResolver)) + { + copyResolver = new CopyResolver(); + + copyResolvers.Add(position, copyResolver); + } + + return copyResolver; + } + + foreach (LiveInterval interval in _intervals.Where(x => x.IsSplit)) + { + LiveInterval previous = interval; + + foreach (LiveInterval splitChild in interval.SplitChildren()) + { + int splitPosition = splitChild.GetStart(); + + if (!_blockEdges.Contains(splitPosition) && previous.GetEnd() == splitPosition) + { + GetCopyResolver(splitPosition).AddSplit(previous, splitChild); + } + + previous = splitChild; + } + } + + foreach (KeyValuePair<int, CopyResolver> kv in copyResolvers) + { + CopyResolver copyResolver = kv.Value; + + if (!copyResolver.HasCopy) + { + continue; + } + + int splitPosition = kv.Key; + + (IntrusiveList<Operation> nodes, Operation node) = GetOperationNode(splitPosition); + + Operation[] sequence = copyResolver.Sequence(); + + nodes.AddBefore(node, sequence[0]); + + node = sequence[0]; + + for (int index = 1; index < sequence.Length; index++) + { + nodes.AddAfter(node, sequence[index]); + + node = sequence[index]; + } + } + } + + private void InsertSplitCopiesAtEdges(ControlFlowGraph cfg) + { + int blocksCount = cfg.Blocks.Count; + + bool IsSplitEdgeBlock(BasicBlock block) + { + return block.Index >= blocksCount; + } + + // Reset iterators to beginning because GetSplitChild depends on the state of the iterator. + foreach (LiveInterval interval in _intervals) + { + interval.Reset(); + } + + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + if (IsSplitEdgeBlock(block)) + { + continue; + } + + bool hasSingleOrNoSuccessor = block.SuccessorsCount <= 1; + + for (int i = 0; i < block.SuccessorsCount; i++) + { + BasicBlock successor = block.GetSuccessor(i); + + int succIndex = successor.Index; + + // If the current node is a split node, then the actual successor node + // (the successor before the split) should be right after it. + if (IsSplitEdgeBlock(successor)) + { + succIndex = successor.GetSuccessor(0).Index; + } + + CopyResolver copyResolver = null; + + foreach (int iIndex in _blockLiveIn[succIndex]) + { + LiveInterval interval = _parentIntervals[iIndex]; + + if (!interval.IsSplit) + { + continue; + } + + int lEnd = _blockRanges[block.Index].End - 1; + int rStart = _blockRanges[succIndex].Start; + + LiveInterval left = interval.GetSplitChild(lEnd); + LiveInterval right = interval.GetSplitChild(rStart); + + if (left != default && right != default && left != right) + { + if (copyResolver == null) + { + copyResolver = new CopyResolver(); + } + + copyResolver.AddSplit(left, right); + } + } + + if (copyResolver == null || !copyResolver.HasCopy) + { + continue; + } + + Operation[] sequence = copyResolver.Sequence(); + + if (hasSingleOrNoSuccessor) + { + foreach (Operation operation in sequence) + { + block.Append(operation); + } + } + else if (successor.Predecessors.Count == 1) + { + successor.Operations.AddFirst(sequence[0]); + + Operation prependNode = sequence[0]; + + for (int index = 1; index < sequence.Length; index++) + { + Operation operation = sequence[index]; + + successor.Operations.AddAfter(prependNode, operation); + + prependNode = operation; + } + } + else + { + // Split the critical edge. + BasicBlock splitBlock = cfg.SplitEdge(block, successor); + + foreach (Operation operation in sequence) + { + splitBlock.Append(operation); + } + } + } + } + } + + private void ReplaceLocalWithRegister(LiveInterval current) + { + Operand register = GetRegister(current); + + foreach (int usePosition in current.UsePositions()) + { + (_, Operation operation) = GetOperationNode(usePosition); + + for (int index = 0; index < operation.SourcesCount; index++) + { + Operand source = operation.GetSource(index); + + if (source == current.Local) + { + operation.SetSource(index, register); + } + else if (source.Kind == OperandKind.Memory) + { + MemoryOperand memOp = source.GetMemory(); + + if (memOp.BaseAddress == current.Local) + { + memOp.BaseAddress = register; + } + + if (memOp.Index == current.Local) + { + memOp.Index = register; + } + } + } + + for (int index = 0; index < operation.DestinationsCount; index++) + { + Operand dest = operation.GetDestination(index); + + if (dest == current.Local) + { + operation.SetDestination(index, register); + } + } + } + } + + private static Operand GetRegister(LiveInterval interval) + { + Debug.Assert(!interval.IsSpilled, "Spilled intervals are not allowed."); + + return Operand.Factory.Register( + interval.Register.Index, + interval.Register.Type, + interval.Local.Type); + } + + private (IntrusiveList<Operation>, Operation) GetOperationNode(int position) + { + return _operationNodes[position / InstructionGap]; + } + + private void NumberLocals(ControlFlowGraph cfg, int registersCount) + { + _operationNodes = new List<(IntrusiveList<Operation>, Operation)>(); + _intervals = new List<LiveInterval>(); + + for (int index = 0; index < registersCount; index++) + { + _intervals.Add(new LiveInterval(new Register(index, RegisterType.Integer))); + _intervals.Add(new LiveInterval(new Register(index, RegisterType.Vector))); + } + + // The "visited" state is stored in the MSB of the local's value. + const ulong VisitedMask = 1ul << 63; + + bool IsVisited(Operand local) + { + return (local.GetValueUnsafe() & VisitedMask) != 0; + } + + void SetVisited(Operand local) + { + local.GetValueUnsafe() |= VisitedMask; + } + + _operationsCount = 0; + + for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + for (Operation node = block.Operations.First; node != default; node = node.ListNext) + { + _operationNodes.Add((block.Operations, node)); + + for (int i = 0; i < node.DestinationsCount; i++) + { + Operand dest = node.GetDestination(i); + + if (dest.Kind == OperandKind.LocalVariable && !IsVisited(dest)) + { + dest.NumberLocal(_intervals.Count); + + _intervals.Add(new LiveInterval(dest)); + + SetVisited(dest); + } + } + } + + _operationsCount += block.Operations.Count * InstructionGap; + + if (block.Operations.Count == 0) + { + // Pretend we have a dummy instruction on the empty block. + _operationNodes.Add((default, default)); + + _operationsCount += InstructionGap; + } + } + + _parentIntervals = _intervals.ToArray(); + } + + private void BuildIntervals(ControlFlowGraph cfg, AllocationContext context) + { + _blockRanges = new LiveRange[cfg.Blocks.Count]; + + int mapSize = _intervals.Count; + + BitMap[] blkLiveGen = new BitMap[cfg.Blocks.Count]; + BitMap[] blkLiveKill = new BitMap[cfg.Blocks.Count]; + + // Compute local live sets. + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + BitMap liveGen = new BitMap(Allocators.Default, mapSize); + BitMap liveKill = new BitMap(Allocators.Default, mapSize); + + for (Operation node = block.Operations.First; node != default; node = node.ListNext) + { + for (int i = 0; i < node.SourcesCount; i++) + { + VisitSource(node.GetSource(i)); + } + + for (int i = 0; i < node.DestinationsCount; i++) + { + VisitDestination(node.GetDestination(i)); + } + + void VisitSource(Operand source) + { + if (IsLocalOrRegister(source.Kind)) + { + int id = GetOperandId(source); + + if (!liveKill.IsSet(id)) + { + liveGen.Set(id); + } + } + else if (source.Kind == OperandKind.Memory) + { + MemoryOperand memOp = source.GetMemory(); + + if (memOp.BaseAddress != default) + { + VisitSource(memOp.BaseAddress); + } + + if (memOp.Index != default) + { + VisitSource(memOp.Index); + } + } + } + + void VisitDestination(Operand dest) + { + liveKill.Set(GetOperandId(dest)); + } + } + + blkLiveGen [block.Index] = liveGen; + blkLiveKill[block.Index] = liveKill; + } + + // Compute global live sets. + BitMap[] blkLiveIn = new BitMap[cfg.Blocks.Count]; + BitMap[] blkLiveOut = new BitMap[cfg.Blocks.Count]; + + for (int index = 0; index < cfg.Blocks.Count; index++) + { + blkLiveIn [index] = new BitMap(Allocators.Default, mapSize); + blkLiveOut[index] = new BitMap(Allocators.Default, mapSize); + } + + bool modified; + + do + { + modified = false; + + for (int index = 0; index < cfg.PostOrderBlocks.Length; index++) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + BitMap liveOut = blkLiveOut[block.Index]; + + for (int i = 0; i < block.SuccessorsCount; i++) + { + BasicBlock succ = block.GetSuccessor(i); + + modified |= liveOut.Set(blkLiveIn[succ.Index]); + } + + BitMap liveIn = blkLiveIn[block.Index]; + + liveIn.Set (liveOut); + liveIn.Clear(blkLiveKill[block.Index]); + liveIn.Set (blkLiveGen [block.Index]); + } + } + while (modified); + + _blockLiveIn = blkLiveIn; + + _blockEdges = new HashSet<int>(); + + // Compute lifetime intervals. + int operationPos = _operationsCount; + + for (int index = 0; index < cfg.PostOrderBlocks.Length; index++) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + // We handle empty blocks by pretending they have a dummy instruction, + // because otherwise the block would have the same start and end position, + // and this is not valid. + int instCount = Math.Max(block.Operations.Count, 1); + + int blockStart = operationPos - instCount * InstructionGap; + int blockEnd = operationPos; + + _blockRanges[block.Index] = new LiveRange(blockStart, blockEnd); + + _blockEdges.Add(blockStart); + + BitMap liveOut = blkLiveOut[block.Index]; + + foreach (int id in liveOut) + { + _intervals[id].AddRange(blockStart, blockEnd); + } + + if (block.Operations.Count == 0) + { + operationPos -= InstructionGap; + + continue; + } + + for (Operation node = block.Operations.Last; node != default; node = node.ListPrevious) + { + operationPos -= InstructionGap; + + for (int i = 0; i < node.DestinationsCount; i++) + { + VisitDestination(node.GetDestination(i)); + } + + for (int i = 0; i < node.SourcesCount; i++) + { + VisitSource(node.GetSource(i)); + } + + if (node.Instruction == Instruction.Call) + { + AddIntervalCallerSavedReg(context.Masks.IntCallerSavedRegisters, operationPos, RegisterType.Integer); + AddIntervalCallerSavedReg(context.Masks.VecCallerSavedRegisters, operationPos, RegisterType.Vector); + } + + void VisitSource(Operand source) + { + if (IsLocalOrRegister(source.Kind)) + { + LiveInterval interval = _intervals[GetOperandId(source)]; + + interval.AddRange(blockStart, operationPos + 1); + interval.AddUsePosition(operationPos); + } + else if (source.Kind == OperandKind.Memory) + { + MemoryOperand memOp = source.GetMemory(); + + if (memOp.BaseAddress != default) + { + VisitSource(memOp.BaseAddress); + } + + if (memOp.Index != default) + { + VisitSource(memOp.Index); + } + } + } + + void VisitDestination(Operand dest) + { + LiveInterval interval = _intervals[GetOperandId(dest)]; + + if (interval.IsFixed) + { + interval.IsFixedAndUsed = true; + } + + interval.SetStart(operationPos + 1); + interval.AddUsePosition(operationPos + 1); + } + } + } + + foreach (LiveInterval interval in _parentIntervals) + { + interval.Reset(); + } + } + + private void AddIntervalCallerSavedReg(int mask, int operationPos, RegisterType regType) + { + while (mask != 0) + { + int regIndex = BitOperations.TrailingZeroCount(mask); + + Register callerSavedReg = new Register(regIndex, regType); + + LiveInterval interval = _intervals[GetRegisterId(callerSavedReg)]; + + interval.AddRange(operationPos + 1, operationPos + InstructionGap); + + mask &= ~(1 << regIndex); + } + } + + private static int GetOperandId(Operand operand) + { + if (operand.Kind == OperandKind.LocalVariable) + { + return operand.GetLocalNumber(); + } + else if (operand.Kind == OperandKind.Register) + { + return GetRegisterId(operand.GetRegister()); + } + else + { + throw new ArgumentException($"Invalid operand kind \"{operand.Kind}\"."); + } + } + + private static int GetRegisterId(Register register) + { + return (register.Index << 1) | (register.Type == RegisterType.Vector ? 1 : 0); + } + + private static bool IsLocalOrRegister(OperandKind kind) + { + return kind == OperandKind.LocalVariable || + kind == OperandKind.Register; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs new file mode 100644 index 00000000..d739ad28 --- /dev/null +++ b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveInterval.cs @@ -0,0 +1,396 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Collections.Generic; +using System.Diagnostics; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + unsafe readonly struct LiveInterval : IComparable<LiveInterval> + { + public const int NotFound = -1; + + private struct Data + { + public int End; + public int SpillOffset; + + public LiveRange FirstRange; + public LiveRange PrevRange; + public LiveRange CurrRange; + + public LiveInterval Parent; + + public UseList Uses; + public LiveIntervalList Children; + + public Operand Local; + public Register Register; + + public bool IsFixed; + public bool IsFixedAndUsed; + } + + private readonly Data* _data; + + private ref int End => ref _data->End; + private ref LiveRange FirstRange => ref _data->FirstRange; + private ref LiveRange CurrRange => ref _data->CurrRange; + private ref LiveRange PrevRange => ref _data->PrevRange; + private ref LiveInterval Parent => ref _data->Parent; + private ref UseList Uses => ref _data->Uses; + private ref LiveIntervalList Children => ref _data->Children; + + public Operand Local => _data->Local; + public ref Register Register => ref _data->Register; + public ref int SpillOffset => ref _data->SpillOffset; + + public bool IsFixed => _data->IsFixed; + public ref bool IsFixedAndUsed => ref _data->IsFixedAndUsed; + public bool IsEmpty => FirstRange == default; + public bool IsSplit => Children.Count != 0; + public bool IsSpilled => SpillOffset != -1; + + public int UsesCount => Uses.Count; + + public LiveInterval(Operand local = default, LiveInterval parent = default) + { + _data = Allocators.LiveIntervals.Allocate<Data>(); + *_data = default; + + _data->IsFixed = false; + _data->Local = local; + + Parent = parent == default ? this : parent; + Uses = new UseList(); + Children = new LiveIntervalList(); + + FirstRange = default; + CurrRange = default; + PrevRange = default; + + SpillOffset = -1; + } + + public LiveInterval(Register register) : this(local: default, parent: default) + { + _data->IsFixed = true; + + Register = register; + } + + public void Reset() + { + PrevRange = default; + CurrRange = FirstRange; + } + + public void Forward(int position) + { + LiveRange prev = PrevRange; + LiveRange curr = CurrRange; + + while (curr != default && curr.Start < position && !curr.Overlaps(position)) + { + prev = curr; + curr = curr.Next; + } + + PrevRange = prev; + CurrRange = curr; + } + + public int GetStart() + { + Debug.Assert(!IsEmpty, "Empty LiveInterval cannot have a start position."); + + return FirstRange.Start; + } + + public void SetStart(int position) + { + if (FirstRange != default) + { + Debug.Assert(position != FirstRange.End); + + FirstRange.Start = position; + } + else + { + FirstRange = new LiveRange(position, position + 1); + End = position + 1; + } + } + + public int GetEnd() + { + Debug.Assert(!IsEmpty, "Empty LiveInterval cannot have an end position."); + + return End; + } + + public void AddRange(int start, int end) + { + Debug.Assert(start < end, $"Invalid range start position {start}, {end}"); + + if (FirstRange != default) + { + // If the new range ends exactly where the first range start, then coalesce together. + if (end == FirstRange.Start) + { + FirstRange.Start = start; + + return; + } + // If the new range is already contained, then coalesce together. + else if (FirstRange.Overlaps(start, end)) + { + FirstRange.Start = Math.Min(FirstRange.Start, start); + FirstRange.End = Math.Max(FirstRange.End, end); + End = Math.Max(End, end); + + Debug.Assert(FirstRange.Next == default || !FirstRange.Overlaps(FirstRange.Next)); + return; + } + } + + FirstRange = new LiveRange(start, end, FirstRange); + End = Math.Max(End, end); + + Debug.Assert(FirstRange.Next == default || !FirstRange.Overlaps(FirstRange.Next)); + } + + public void AddUsePosition(int position) + { + Uses.Add(position); + } + + public bool Overlaps(int position) + { + LiveRange curr = CurrRange; + + while (curr != default && curr.Start <= position) + { + if (curr.Overlaps(position)) + { + return true; + } + + curr = curr.Next; + } + + return false; + } + + public bool Overlaps(LiveInterval other) + { + return GetOverlapPosition(other) != NotFound; + } + + public int GetOverlapPosition(LiveInterval other) + { + LiveRange a = CurrRange; + LiveRange b = other.CurrRange; + + while (a != default) + { + while (b != default && b.Start < a.Start) + { + if (a.Overlaps(b)) + { + return a.Start; + } + + b = b.Next; + } + + if (b == default) + { + break; + } + else if (a.Overlaps(b)) + { + return a.Start; + } + + a = a.Next; + } + + return NotFound; + } + + public ReadOnlySpan<LiveInterval> SplitChildren() + { + return Parent.Children.Span; + } + + public ReadOnlySpan<int> UsePositions() + { + return Uses.Span; + } + + public int FirstUse() + { + return Uses.FirstUse; + } + + public int NextUseAfter(int position) + { + return Uses.NextUse(position); + } + + public LiveInterval Split(int position) + { + LiveInterval result = new(Local, Parent); + result.End = End; + + LiveRange prev = PrevRange; + LiveRange curr = CurrRange; + + while (curr != default && curr.Start < position && !curr.Overlaps(position)) + { + prev = curr; + curr = curr.Next; + } + + if (curr.Start >= position) + { + prev.Next = default; + + result.FirstRange = curr; + + End = prev.End; + } + else + { + result.FirstRange = new LiveRange(position, curr.End, curr.Next); + + curr.End = position; + curr.Next = default; + + End = curr.End; + } + + result.Uses = Uses.Split(position); + + AddSplitChild(result); + + Debug.Assert(!IsEmpty, "Left interval is empty after split."); + Debug.Assert(!result.IsEmpty, "Right interval is empty after split."); + + // Make sure the iterator in the new split is pointing to the start. + result.Reset(); + + return result; + } + + private void AddSplitChild(LiveInterval child) + { + Debug.Assert(!child.IsEmpty, "Trying to insert an empty interval."); + + Parent.Children.Add(child); + } + + public LiveInterval GetSplitChild(int position) + { + if (Overlaps(position)) + { + return this; + } + + foreach (LiveInterval splitChild in SplitChildren()) + { + if (splitChild.Overlaps(position)) + { + return splitChild; + } + else if (splitChild.GetStart() > position) + { + break; + } + } + + return default; + } + + public bool TrySpillWithSiblingOffset() + { + foreach (LiveInterval splitChild in SplitChildren()) + { + if (splitChild.IsSpilled) + { + Spill(splitChild.SpillOffset); + + return true; + } + } + + return false; + } + + public void Spill(int offset) + { + SpillOffset = offset; + } + + public int CompareTo(LiveInterval interval) + { + if (FirstRange == default || interval.FirstRange == default) + { + return 0; + } + + return GetStart().CompareTo(interval.GetStart()); + } + + public bool Equals(LiveInterval interval) + { + return interval._data == _data; + } + + public override bool Equals(object obj) + { + return obj is LiveInterval interval && Equals(interval); + } + + public static bool operator ==(LiveInterval a, LiveInterval b) + { + return a.Equals(b); + } + + public static bool operator !=(LiveInterval a, LiveInterval b) + { + return !a.Equals(b); + } + + public override int GetHashCode() + { + return HashCode.Combine((IntPtr)_data); + } + + public override string ToString() + { + LiveInterval self = this; + + IEnumerable<string> GetRanges() + { + LiveRange curr = self.CurrRange; + + while (curr != default) + { + if (curr == self.CurrRange) + { + yield return "*" + curr; + } + else + { + yield return curr.ToString(); + } + + curr = curr.Next; + } + } + + return string.Join(", ", GetRanges()); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/LiveIntervalList.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveIntervalList.cs new file mode 100644 index 00000000..06b979ea --- /dev/null +++ b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveIntervalList.cs @@ -0,0 +1,40 @@ +using System; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + unsafe struct LiveIntervalList + { + private LiveInterval* _items; + private int _count; + private int _capacity; + + public int Count => _count; + public Span<LiveInterval> Span => new(_items, _count); + + public void Add(LiveInterval interval) + { + if (_count + 1 > _capacity) + { + var oldSpan = Span; + + _capacity = Math.Max(4, _capacity * 2); + _items = Allocators.References.Allocate<LiveInterval>((uint)_capacity); + + var newSpan = Span; + + oldSpan.CopyTo(newSpan); + } + + int position = interval.GetStart(); + int i = _count - 1; + + while (i >= 0 && _items[i].GetStart() > position) + { + _items[i + 1] = _items[i--]; + } + + _items[i + 1] = interval; + _count++; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs new file mode 100644 index 00000000..e38b5190 --- /dev/null +++ b/src/ARMeilleure/CodeGen/RegisterAllocators/LiveRange.cs @@ -0,0 +1,74 @@ +using System; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + unsafe readonly struct LiveRange : IEquatable<LiveRange> + { + private struct Data + { + public int Start; + public int End; + public LiveRange Next; + } + + private readonly Data* _data; + + public ref int Start => ref _data->Start; + public ref int End => ref _data->End; + public ref LiveRange Next => ref _data->Next; + + public LiveRange(int start, int end, LiveRange next = default) + { + _data = Allocators.LiveRanges.Allocate<Data>(); + + Start = start; + End = end; + Next = next; + } + + public bool Overlaps(int start, int end) + { + return Start < end && start < End; + } + + public bool Overlaps(LiveRange range) + { + return Start < range.End && range.Start < End; + } + + public bool Overlaps(int position) + { + return position >= Start && position < End; + } + + public bool Equals(LiveRange range) + { + return range._data == _data; + } + + public override bool Equals(object obj) + { + return obj is LiveRange range && Equals(range); + } + + public static bool operator ==(LiveRange a, LiveRange b) + { + return a.Equals(b); + } + + public static bool operator !=(LiveRange a, LiveRange b) + { + return !a.Equals(b); + } + + public override int GetHashCode() + { + return HashCode.Combine((IntPtr)_data); + } + + public override string ToString() + { + return $"[{Start}, {End})"; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs new file mode 100644 index 00000000..bc948f95 --- /dev/null +++ b/src/ARMeilleure/CodeGen/RegisterAllocators/RegisterMasks.cs @@ -0,0 +1,50 @@ +using ARMeilleure.IntermediateRepresentation; +using System; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + readonly struct RegisterMasks + { + public int IntAvailableRegisters { get; } + public int VecAvailableRegisters { get; } + public int IntCallerSavedRegisters { get; } + public int VecCallerSavedRegisters { get; } + public int IntCalleeSavedRegisters { get; } + public int VecCalleeSavedRegisters { get; } + public int RegistersCount { get; } + + public RegisterMasks( + int intAvailableRegisters, + int vecAvailableRegisters, + int intCallerSavedRegisters, + int vecCallerSavedRegisters, + int intCalleeSavedRegisters, + int vecCalleeSavedRegisters, + int registersCount) + { + IntAvailableRegisters = intAvailableRegisters; + VecAvailableRegisters = vecAvailableRegisters; + IntCallerSavedRegisters = intCallerSavedRegisters; + VecCallerSavedRegisters = vecCallerSavedRegisters; + IntCalleeSavedRegisters = intCalleeSavedRegisters; + VecCalleeSavedRegisters = vecCalleeSavedRegisters; + RegistersCount = registersCount; + } + + public int GetAvailableRegisters(RegisterType type) + { + if (type == RegisterType.Integer) + { + return IntAvailableRegisters; + } + else if (type == RegisterType.Vector) + { + return VecAvailableRegisters; + } + else + { + throw new ArgumentException($"Invalid register type \"{type}\"."); + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs new file mode 100644 index 00000000..038312fe --- /dev/null +++ b/src/ARMeilleure/CodeGen/RegisterAllocators/StackAllocator.cs @@ -0,0 +1,25 @@ +using ARMeilleure.IntermediateRepresentation; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + class StackAllocator + { + private int _offset; + + public int TotalSize => _offset; + + public int Allocate(OperandType type) + { + return Allocate(type.GetSizeInBytes()); + } + + public int Allocate(int sizeInBytes) + { + int offset = _offset; + + _offset += sizeInBytes; + + return offset; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/RegisterAllocators/UseList.cs b/src/ARMeilleure/CodeGen/RegisterAllocators/UseList.cs new file mode 100644 index 00000000..c89f0854 --- /dev/null +++ b/src/ARMeilleure/CodeGen/RegisterAllocators/UseList.cs @@ -0,0 +1,84 @@ +using System; + +namespace ARMeilleure.CodeGen.RegisterAllocators +{ + unsafe struct UseList + { + private int* _items; + private int _capacity; + private int _count; + + public int Count => _count; + public int FirstUse => _count > 0 ? _items[_count - 1] : LiveInterval.NotFound; + public Span<int> Span => new(_items, _count); + + public void Add(int position) + { + if (_count + 1 > _capacity) + { + var oldSpan = Span; + + _capacity = Math.Max(4, _capacity * 2); + _items = Allocators.Default.Allocate<int>((uint)_capacity); + + var newSpan = Span; + + oldSpan.CopyTo(newSpan); + } + + // Use positions are usually inserted in descending order, so inserting in descending order is faster, + // since the number of half exchanges is reduced. + int i = _count - 1; + + while (i >= 0 && _items[i] < position) + { + _items[i + 1] = _items[i--]; + } + + _items[i + 1] = position; + _count++; + } + + public int NextUse(int position) + { + int index = NextUseIndex(position); + + return index != LiveInterval.NotFound ? _items[index] : LiveInterval.NotFound; + } + + public int NextUseIndex(int position) + { + int i = _count - 1; + + if (i == -1 || position > _items[0]) + { + return LiveInterval.NotFound; + } + + while (i >= 0 && _items[i] < position) + { + i--; + } + + return i; + } + + public UseList Split(int position) + { + int index = NextUseIndex(position); + + // Since the list is in descending order, the new split list takes the front of the list and the current + // list takes the back of the list. + UseList result = new(); + result._count = index + 1; + result._capacity = result._count; + result._items = _items; + + _count = _count - result._count; + _capacity = _count; + _items = _items + result._count; + + return result; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs b/src/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs new file mode 100644 index 00000000..3d0bc21d --- /dev/null +++ b/src/ARMeilleure/CodeGen/Unwinding/UnwindInfo.cs @@ -0,0 +1,16 @@ +namespace ARMeilleure.CodeGen.Unwinding +{ + struct UnwindInfo + { + public const int Stride = 4; // Bytes. + + public UnwindPushEntry[] PushEntries { get; } + public int PrologSize { get; } + + public UnwindInfo(UnwindPushEntry[] pushEntries, int prologSize) + { + PushEntries = pushEntries; + PrologSize = prologSize; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Unwinding/UnwindPseudoOp.cs b/src/ARMeilleure/CodeGen/Unwinding/UnwindPseudoOp.cs new file mode 100644 index 00000000..4a8288a2 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Unwinding/UnwindPseudoOp.cs @@ -0,0 +1,11 @@ +namespace ARMeilleure.CodeGen.Unwinding +{ + enum UnwindPseudoOp + { + PushReg = 0, + SetFrame = 1, + AllocStack = 2, + SaveReg = 3, + SaveXmm128 = 4 + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs b/src/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs new file mode 100644 index 00000000..fd8ea402 --- /dev/null +++ b/src/ARMeilleure/CodeGen/Unwinding/UnwindPushEntry.cs @@ -0,0 +1,20 @@ +namespace ARMeilleure.CodeGen.Unwinding +{ + struct UnwindPushEntry + { + public const int Stride = 16; // Bytes. + + public UnwindPseudoOp PseudoOp { get; } + public int PrologOffset { get; } + public int RegIndex { get; } + public int StackOffsetOrAllocSize { get; } + + public UnwindPushEntry(UnwindPseudoOp pseudoOp, int prologOffset, int regIndex = -1, int stackOffsetOrAllocSize = -1) + { + PseudoOp = pseudoOp; + PrologOffset = prologOffset; + RegIndex = regIndex; + StackOffsetOrAllocSize = stackOffsetOrAllocSize; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/Assembler.cs b/src/ARMeilleure/CodeGen/X86/Assembler.cs new file mode 100644 index 00000000..67736a31 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/Assembler.cs @@ -0,0 +1,1559 @@ +using ARMeilleure.CodeGen.Linking; +using ARMeilleure.IntermediateRepresentation; +using Ryujinx.Common.Memory; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.Runtime.InteropServices; + +namespace ARMeilleure.CodeGen.X86 +{ + partial class Assembler + { + private const int ReservedBytesForJump = 1; + + private const int OpModRMBits = 24; + + private const byte RexPrefix = 0x40; + private const byte RexWPrefix = 0x48; + private const byte LockPrefix = 0xf0; + + private const int MaxRegNumber = 15; + + private struct Jump + { + public bool IsConditional { get; } + public X86Condition Condition { get; } + public Operand JumpLabel { get; } + public long? JumpTarget { get; set; } + public long JumpPosition { get; } + public long Offset { get; set; } + public int InstSize { get; set; } + + public Jump(Operand jumpLabel, long jumpPosition) + { + IsConditional = false; + Condition = 0; + JumpLabel = jumpLabel; + JumpTarget = null; + JumpPosition = jumpPosition; + + Offset = 0; + InstSize = 0; + } + + public Jump(X86Condition condition, Operand jumpLabel, long jumpPosition) + { + IsConditional = true; + Condition = condition; + JumpLabel = jumpLabel; + JumpTarget = null; + JumpPosition = jumpPosition; + + Offset = 0; + InstSize = 0; + } + } + + private struct Reloc + { + public int JumpIndex { get; set; } + public int Position { get; set; } + public Symbol Symbol { get; set; } + } + + private readonly List<Jump> _jumps; + private readonly List<Reloc> _relocs; + private readonly Dictionary<Operand, long> _labels; + private readonly Stream _stream; + + public bool HasRelocs => _relocs != null; + + public Assembler(Stream stream, bool relocatable) + { + _stream = stream; + _labels = new Dictionary<Operand, long>(); + _jumps = new List<Jump>(); + + _relocs = relocatable ? new List<Reloc>() : null; + } + + public void MarkLabel(Operand label) + { + _labels.Add(label, _stream.Position); + } + + public void Add(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Add); + } + + public void Addsd(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Addsd); + } + + public void Addss(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Addss); + } + + public void And(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.And); + } + + public void Bsr(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Bsr); + } + + public void Bswap(Operand dest) + { + WriteInstruction(dest, default, dest.Type, X86Instruction.Bswap); + } + + public void Call(Operand dest) + { + WriteInstruction(dest, default, OperandType.None, X86Instruction.Call); + } + + public void Cdq() + { + WriteByte(0x99); + } + + public void Cmovcc(Operand dest, Operand source, OperandType type, X86Condition condition) + { + ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Cmovcc]; + + WriteOpCode(dest, default, source, type, info.Flags, info.OpRRM | (int)condition, rrm: true); + } + + public void Cmp(Operand src1, Operand src2, OperandType type) + { + WriteInstruction(src1, src2, type, X86Instruction.Cmp); + } + + public void Cqo() + { + WriteByte(0x48); + WriteByte(0x99); + } + + public void Cmpxchg(Operand memOp, Operand src) + { + Debug.Assert(memOp.Kind == OperandKind.Memory); + + WriteByte(LockPrefix); + + WriteInstruction(memOp, src, src.Type, X86Instruction.Cmpxchg); + } + + public void Cmpxchg16(Operand memOp, Operand src) + { + Debug.Assert(memOp.Kind == OperandKind.Memory); + + WriteByte(LockPrefix); + WriteByte(0x66); + + WriteInstruction(memOp, src, src.Type, X86Instruction.Cmpxchg); + } + + public void Cmpxchg16b(Operand memOp) + { + Debug.Assert(memOp.Kind == OperandKind.Memory); + + WriteByte(LockPrefix); + + WriteInstruction(memOp, default, OperandType.None, X86Instruction.Cmpxchg16b); + } + + public void Cmpxchg8(Operand memOp, Operand src) + { + Debug.Assert(memOp.Kind == OperandKind.Memory); + + WriteByte(LockPrefix); + + WriteInstruction(memOp, src, src.Type, X86Instruction.Cmpxchg8); + } + + public void Comisd(Operand src1, Operand src2) + { + WriteInstruction(src1, default, src2, X86Instruction.Comisd); + } + + public void Comiss(Operand src1, Operand src2) + { + WriteInstruction(src1, default, src2, X86Instruction.Comiss); + } + + public void Cvtsd2ss(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Cvtsd2ss); + } + + public void Cvtsi2sd(Operand dest, Operand src1, Operand src2, OperandType type) + { + WriteInstruction(dest, src1, src2, X86Instruction.Cvtsi2sd, type); + } + + public void Cvtsi2ss(Operand dest, Operand src1, Operand src2, OperandType type) + { + WriteInstruction(dest, src1, src2, X86Instruction.Cvtsi2ss, type); + } + + public void Cvtss2sd(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Cvtss2sd); + } + + public void Div(Operand source) + { + WriteInstruction(default, source, source.Type, X86Instruction.Div); + } + + public void Divsd(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Divsd); + } + + public void Divss(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Divss); + } + + public void Idiv(Operand source) + { + WriteInstruction(default, source, source.Type, X86Instruction.Idiv); + } + + public void Imul(Operand source) + { + WriteInstruction(default, source, source.Type, X86Instruction.Imul128); + } + + public void Imul(Operand dest, Operand source, OperandType type) + { + if (source.Kind != OperandKind.Register) + { + throw new ArgumentException($"Invalid source operand kind \"{source.Kind}\"."); + } + + WriteInstruction(dest, source, type, X86Instruction.Imul); + } + + public void Imul(Operand dest, Operand src1, Operand src2, OperandType type) + { + ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Imul]; + + if (src2.Kind != OperandKind.Constant) + { + throw new ArgumentException($"Invalid source 2 operand kind \"{src2.Kind}\"."); + } + + if (IsImm8(src2.Value, src2.Type) && info.OpRMImm8 != BadOp) + { + WriteOpCode(dest, default, src1, type, info.Flags, info.OpRMImm8, rrm: true); + + WriteByte(src2.AsByte()); + } + else if (IsImm32(src2.Value, src2.Type) && info.OpRMImm32 != BadOp) + { + WriteOpCode(dest, default, src1, type, info.Flags, info.OpRMImm32, rrm: true); + + WriteInt32(src2.AsInt32()); + } + else + { + throw new ArgumentException($"Failed to encode constant 0x{src2.Value:X}."); + } + } + + public void Insertps(Operand dest, Operand src1, Operand src2, byte imm) + { + WriteInstruction(dest, src1, src2, X86Instruction.Insertps); + + WriteByte(imm); + } + + public void Jcc(X86Condition condition, Operand dest) + { + if (dest.Kind == OperandKind.Label) + { + _jumps.Add(new Jump(condition, dest, _stream.Position)); + + // ReservedBytesForJump + WriteByte(0); + } + else + { + throw new ArgumentException("Destination operand must be of kind Label", nameof(dest)); + } + } + + public void Jcc(X86Condition condition, long offset) + { + if (ConstFitsOnS8(offset)) + { + WriteByte((byte)(0x70 | (int)condition)); + + WriteByte((byte)offset); + } + else if (ConstFitsOnS32(offset)) + { + WriteByte(0x0f); + WriteByte((byte)(0x80 | (int)condition)); + + WriteInt32((int)offset); + } + else + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } + } + + public void Jmp(long offset) + { + if (ConstFitsOnS8(offset)) + { + WriteByte(0xeb); + + WriteByte((byte)offset); + } + else if (ConstFitsOnS32(offset)) + { + WriteByte(0xe9); + + WriteInt32((int)offset); + } + else + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } + } + + public void Jmp(Operand dest) + { + if (dest.Kind == OperandKind.Label) + { + _jumps.Add(new Jump(dest, _stream.Position)); + + // ReservedBytesForJump + WriteByte(0); + } + else + { + WriteInstruction(dest, default, OperandType.None, X86Instruction.Jmp); + } + } + + public void Ldmxcsr(Operand dest) + { + WriteInstruction(dest, default, OperandType.I32, X86Instruction.Ldmxcsr); + } + + public void Lea(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Lea); + } + + public void LockOr(Operand dest, Operand source, OperandType type) + { + WriteByte(LockPrefix); + WriteInstruction(dest, source, type, X86Instruction.Or); + } + + public void Mov(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Mov); + } + + public void Mov16(Operand dest, Operand source) + { + WriteInstruction(dest, source, OperandType.None, X86Instruction.Mov16); + } + + public void Mov8(Operand dest, Operand source) + { + WriteInstruction(dest, source, OperandType.None, X86Instruction.Mov8); + } + + public void Movd(Operand dest, Operand source) + { + ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Movd]; + + if (source.Type.IsInteger() || source.Kind == OperandKind.Memory) + { + WriteOpCode(dest, default, source, OperandType.None, info.Flags, info.OpRRM, rrm: true); + } + else + { + WriteOpCode(dest, default, source, OperandType.None, info.Flags, info.OpRMR); + } + } + + public void Movdqu(Operand dest, Operand source) + { + WriteInstruction(dest, default, source, X86Instruction.Movdqu); + } + + public void Movhlps(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Movhlps); + } + + public void Movlhps(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Movlhps); + } + + public void Movq(Operand dest, Operand source) + { + ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Movd]; + + InstructionFlags flags = info.Flags | InstructionFlags.RexW; + + if (source.Type.IsInteger() || source.Kind == OperandKind.Memory) + { + WriteOpCode(dest, default, source, OperandType.None, flags, info.OpRRM, rrm: true); + } + else if (dest.Type.IsInteger() || dest.Kind == OperandKind.Memory) + { + WriteOpCode(dest, default, source, OperandType.None, flags, info.OpRMR); + } + else + { + WriteInstruction(dest, source, OperandType.None, X86Instruction.Movq); + } + } + + public void Movsd(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Movsd); + } + + public void Movss(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Movss); + } + + public void Movsx16(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Movsx16); + } + + public void Movsx32(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Movsx32); + } + + public void Movsx8(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Movsx8); + } + + public void Movzx16(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Movzx16); + } + + public void Movzx8(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Movzx8); + } + + public void Mul(Operand source) + { + WriteInstruction(default, source, source.Type, X86Instruction.Mul128); + } + + public void Mulsd(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Mulsd); + } + + public void Mulss(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Mulss); + } + + public void Neg(Operand dest) + { + WriteInstruction(dest, default, dest.Type, X86Instruction.Neg); + } + + public void Not(Operand dest) + { + WriteInstruction(dest, default, dest.Type, X86Instruction.Not); + } + + public void Or(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Or); + } + + public void Pclmulqdq(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, default, source, X86Instruction.Pclmulqdq); + + WriteByte(imm); + } + + public void Pcmpeqw(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Pcmpeqw); + } + + public void Pextrb(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, default, source, X86Instruction.Pextrb); + + WriteByte(imm); + } + + public void Pextrd(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, default, source, X86Instruction.Pextrd); + + WriteByte(imm); + } + + public void Pextrq(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, default, source, X86Instruction.Pextrq); + + WriteByte(imm); + } + + public void Pextrw(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, default, source, X86Instruction.Pextrw); + + WriteByte(imm); + } + + public void Pinsrb(Operand dest, Operand src1, Operand src2, byte imm) + { + WriteInstruction(dest, src1, src2, X86Instruction.Pinsrb); + + WriteByte(imm); + } + + public void Pinsrd(Operand dest, Operand src1, Operand src2, byte imm) + { + WriteInstruction(dest, src1, src2, X86Instruction.Pinsrd); + + WriteByte(imm); + } + + public void Pinsrq(Operand dest, Operand src1, Operand src2, byte imm) + { + WriteInstruction(dest, src1, src2, X86Instruction.Pinsrq); + + WriteByte(imm); + } + + public void Pinsrw(Operand dest, Operand src1, Operand src2, byte imm) + { + WriteInstruction(dest, src1, src2, X86Instruction.Pinsrw); + + WriteByte(imm); + } + + public void Pop(Operand dest) + { + if (dest.Kind == OperandKind.Register) + { + WriteCompactInst(dest, 0x58); + } + else + { + WriteInstruction(dest, default, dest.Type, X86Instruction.Pop); + } + } + + public void Popcnt(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Popcnt); + } + + public void Pshufd(Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, default, source, X86Instruction.Pshufd); + + WriteByte(imm); + } + + public void Push(Operand source) + { + if (source.Kind == OperandKind.Register) + { + WriteCompactInst(source, 0x50); + } + else + { + WriteInstruction(default, source, source.Type, X86Instruction.Push); + } + } + + public void Return() + { + WriteByte(0xc3); + } + + public void Ror(Operand dest, Operand source, OperandType type) + { + WriteShiftInst(dest, source, type, X86Instruction.Ror); + } + + public void Sar(Operand dest, Operand source, OperandType type) + { + WriteShiftInst(dest, source, type, X86Instruction.Sar); + } + + public void Shl(Operand dest, Operand source, OperandType type) + { + WriteShiftInst(dest, source, type, X86Instruction.Shl); + } + + public void Shr(Operand dest, Operand source, OperandType type) + { + WriteShiftInst(dest, source, type, X86Instruction.Shr); + } + + public void Setcc(Operand dest, X86Condition condition) + { + ref readonly InstructionInfo info = ref _instTable[(int)X86Instruction.Setcc]; + + WriteOpCode(dest, default, default, OperandType.None, info.Flags, info.OpRRM | (int)condition); + } + + public void Stmxcsr(Operand dest) + { + WriteInstruction(dest, default, OperandType.I32, X86Instruction.Stmxcsr); + } + + public void Sub(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Sub); + } + + public void Subsd(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Subsd); + } + + public void Subss(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Subss); + } + + public void Test(Operand src1, Operand src2, OperandType type) + { + WriteInstruction(src1, src2, type, X86Instruction.Test); + } + + public void Xor(Operand dest, Operand source, OperandType type) + { + WriteInstruction(dest, source, type, X86Instruction.Xor); + } + + public void Xorps(Operand dest, Operand src1, Operand src2) + { + WriteInstruction(dest, src1, src2, X86Instruction.Xorps); + } + + public void WriteInstruction( + X86Instruction inst, + Operand dest, + Operand source, + OperandType type = OperandType.None) + { + WriteInstruction(dest, default, source, inst, type); + } + + public void WriteInstruction(X86Instruction inst, Operand dest, Operand src1, Operand src2) + { + if (src2.Kind == OperandKind.Constant) + { + WriteInstruction(src1, dest, src2, inst); + } + else + { + WriteInstruction(dest, src1, src2, inst); + } + } + + public void WriteInstruction( + X86Instruction inst, + Operand dest, + Operand src1, + Operand src2, + OperandType type) + { + WriteInstruction(dest, src1, src2, inst, type); + } + + public void WriteInstruction(X86Instruction inst, Operand dest, Operand source, byte imm) + { + WriteInstruction(dest, default, source, inst); + + WriteByte(imm); + } + + public void WriteInstruction( + X86Instruction inst, + Operand dest, + Operand src1, + Operand src2, + Operand src3) + { + // 3+ operands can only be encoded with the VEX encoding scheme. + Debug.Assert(HardwareCapabilities.SupportsVexEncoding); + + WriteInstruction(dest, src1, src2, inst); + + WriteByte((byte)(src3.AsByte() << 4)); + } + + public void WriteInstruction( + X86Instruction inst, + Operand dest, + Operand src1, + Operand src2, + byte imm) + { + WriteInstruction(dest, src1, src2, inst); + + WriteByte(imm); + } + + private void WriteShiftInst(Operand dest, Operand source, OperandType type, X86Instruction inst) + { + if (source.Kind == OperandKind.Register) + { + X86Register shiftReg = (X86Register)source.GetRegister().Index; + + Debug.Assert(shiftReg == X86Register.Rcx, $"Invalid shift register \"{shiftReg}\"."); + + source = default; + } + else if (source.Kind == OperandKind.Constant) + { + source = Operand.Factory.Const((int)source.Value & (dest.Type == OperandType.I32 ? 0x1f : 0x3f)); + } + + WriteInstruction(dest, source, type, inst); + } + + private void WriteInstruction(Operand dest, Operand source, OperandType type, X86Instruction inst) + { + ref readonly InstructionInfo info = ref _instTable[(int)inst]; + + if (source != default) + { + if (source.Kind == OperandKind.Constant) + { + ulong imm = source.Value; + + if (inst == X86Instruction.Mov8) + { + WriteOpCode(dest, default, default, type, info.Flags, info.OpRMImm8); + + WriteByte((byte)imm); + } + else if (inst == X86Instruction.Mov16) + { + WriteOpCode(dest, default, default, type, info.Flags, info.OpRMImm32); + + WriteInt16((short)imm); + } + else if (IsImm8(imm, type) && info.OpRMImm8 != BadOp) + { + WriteOpCode(dest, default, default, type, info.Flags, info.OpRMImm8); + + WriteByte((byte)imm); + } + else if (!source.Relocatable && IsImm32(imm, type) && info.OpRMImm32 != BadOp) + { + WriteOpCode(dest, default, default, type, info.Flags, info.OpRMImm32); + + WriteInt32((int)imm); + } + else if (dest != default && dest.Kind == OperandKind.Register && info.OpRImm64 != BadOp) + { + int rexPrefix = GetRexPrefix(dest, source, type, rrm: false); + + if (rexPrefix != 0) + { + WriteByte((byte)rexPrefix); + } + + WriteByte((byte)(info.OpRImm64 + (dest.GetRegister().Index & 0b111))); + + if (HasRelocs && source.Relocatable) + { + _relocs.Add(new Reloc + { + JumpIndex = _jumps.Count - 1, + Position = (int)_stream.Position, + Symbol = source.Symbol + }); + } + + WriteUInt64(imm); + } + else + { + throw new ArgumentException($"Failed to encode constant 0x{imm:X}."); + } + } + else if (source.Kind == OperandKind.Register && info.OpRMR != BadOp) + { + WriteOpCode(dest, default, source, type, info.Flags, info.OpRMR); + } + else if (info.OpRRM != BadOp) + { + WriteOpCode(dest, default, source, type, info.Flags, info.OpRRM, rrm: true); + } + else + { + throw new ArgumentException($"Invalid source operand kind \"{source.Kind}\"."); + } + } + else if (info.OpRRM != BadOp) + { + WriteOpCode(dest, default, source, type, info.Flags, info.OpRRM, rrm: true); + } + else if (info.OpRMR != BadOp) + { + WriteOpCode(dest, default, source, type, info.Flags, info.OpRMR); + } + else + { + throw new ArgumentNullException(nameof(source)); + } + } + + private void WriteInstruction( + Operand dest, + Operand src1, + Operand src2, + X86Instruction inst, + OperandType type = OperandType.None) + { + ref readonly InstructionInfo info = ref _instTable[(int)inst]; + + if (src2 != default) + { + if (src2.Kind == OperandKind.Constant) + { + ulong imm = src2.Value; + + if ((byte)imm == imm && info.OpRMImm8 != BadOp) + { + WriteOpCode(dest, src1, default, type, info.Flags, info.OpRMImm8); + + WriteByte((byte)imm); + } + else + { + throw new ArgumentException($"Failed to encode constant 0x{imm:X}."); + } + } + else if (src2.Kind == OperandKind.Register && info.OpRMR != BadOp) + { + WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRMR); + } + else if (info.OpRRM != BadOp) + { + WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRRM, rrm: true); + } + else + { + throw new ArgumentException($"Invalid source operand kind \"{src2.Kind}\"."); + } + } + else if (info.OpRRM != BadOp) + { + WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRRM, rrm: true); + } + else if (info.OpRMR != BadOp) + { + WriteOpCode(dest, src1, src2, type, info.Flags, info.OpRMR); + } + else + { + throw new ArgumentNullException(nameof(src2)); + } + } + + private void WriteOpCode( + Operand dest, + Operand src1, + Operand src2, + OperandType type, + InstructionFlags flags, + int opCode, + bool rrm = false) + { + int rexPrefix = GetRexPrefix(dest, src2, type, rrm); + + if ((flags & InstructionFlags.RexW) != 0) + { + rexPrefix |= RexWPrefix; + } + + int modRM = (opCode >> OpModRMBits) << 3; + + MemoryOperand memOp = default; + bool hasMemOp = false; + + if (dest != default) + { + if (dest.Kind == OperandKind.Register) + { + int regIndex = dest.GetRegister().Index; + + modRM |= (regIndex & 0b111) << (rrm ? 3 : 0); + + if ((flags & InstructionFlags.Reg8Dest) != 0 && regIndex >= 4) + { + rexPrefix |= RexPrefix; + } + } + else if (dest.Kind == OperandKind.Memory) + { + memOp = dest.GetMemory(); + hasMemOp = true; + } + else + { + throw new ArgumentException("Invalid destination operand kind \"" + dest.Kind + "\"."); + } + } + + if (src2 != default) + { + if (src2.Kind == OperandKind.Register) + { + int regIndex = src2.GetRegister().Index; + + modRM |= (regIndex & 0b111) << (rrm ? 0 : 3); + + if ((flags & InstructionFlags.Reg8Src) != 0 && regIndex >= 4) + { + rexPrefix |= RexPrefix; + } + } + else if (src2.Kind == OperandKind.Memory && !hasMemOp) + { + memOp = src2.GetMemory(); + hasMemOp = true; + } + else + { + throw new ArgumentException("Invalid source operand kind \"" + src2.Kind + "\"."); + } + } + + bool needsSibByte = false; + bool needsDisplacement = false; + + int sib = 0; + + if (hasMemOp) + { + // Either source or destination is a memory operand. + Register baseReg = memOp.BaseAddress.GetRegister(); + + X86Register baseRegLow = (X86Register)(baseReg.Index & 0b111); + + needsSibByte = memOp.Index != default || baseRegLow == X86Register.Rsp; + needsDisplacement = memOp.Displacement != 0 || baseRegLow == X86Register.Rbp; + + if (needsDisplacement) + { + if (ConstFitsOnS8(memOp.Displacement)) + { + modRM |= 0x40; + } + else /* if (ConstFitsOnS32(memOp.Displacement)) */ + { + modRM |= 0x80; + } + } + + if (baseReg.Index >= 8) + { + Debug.Assert((uint)baseReg.Index <= MaxRegNumber); + + rexPrefix |= RexPrefix | (baseReg.Index >> 3); + } + + if (needsSibByte) + { + sib = (int)baseRegLow; + + if (memOp.Index != default) + { + int indexReg = memOp.Index.GetRegister().Index; + + Debug.Assert(indexReg != (int)X86Register.Rsp, "Using RSP as index register on the memory operand is not allowed."); + + if (indexReg >= 8) + { + Debug.Assert((uint)indexReg <= MaxRegNumber); + + rexPrefix |= RexPrefix | (indexReg >> 3) << 1; + } + + sib |= (indexReg & 0b111) << 3; + } + else + { + sib |= 0b100 << 3; + } + + sib |= (int)memOp.Scale << 6; + + modRM |= 0b100; + } + else + { + modRM |= (int)baseRegLow; + } + } + else + { + // Source and destination are registers. + modRM |= 0xc0; + } + + Debug.Assert(opCode != BadOp, "Invalid opcode value."); + + if ((flags & InstructionFlags.Evex) != 0 && HardwareCapabilities.SupportsEvexEncoding) + { + WriteEvexInst(dest, src1, src2, type, flags, opCode); + + opCode &= 0xff; + } + else if ((flags & InstructionFlags.Vex) != 0 && HardwareCapabilities.SupportsVexEncoding) + { + // In a vex encoding, only one prefix can be active at a time. The active prefix is encoded in the second byte using two bits. + + int vexByte2 = (flags & InstructionFlags.PrefixMask) switch + { + InstructionFlags.Prefix66 => 1, + InstructionFlags.PrefixF3 => 2, + InstructionFlags.PrefixF2 => 3, + _ => 0 + }; + + if (src1 != default) + { + vexByte2 |= (src1.GetRegister().Index ^ 0xf) << 3; + } + else + { + vexByte2 |= 0b1111 << 3; + } + + ushort opCodeHigh = (ushort)(opCode >> 8); + + if ((rexPrefix & 0b1011) == 0 && opCodeHigh == 0xf) + { + // Two-byte form. + WriteByte(0xc5); + + vexByte2 |= (~rexPrefix & 4) << 5; + + WriteByte((byte)vexByte2); + } + else + { + // Three-byte form. + WriteByte(0xc4); + + int vexByte1 = (~rexPrefix & 7) << 5; + + switch (opCodeHigh) + { + case 0xf: vexByte1 |= 1; break; + case 0xf38: vexByte1 |= 2; break; + case 0xf3a: vexByte1 |= 3; break; + + default: Debug.Assert(false, $"Failed to VEX encode opcode 0x{opCode:X}."); break; + } + + vexByte2 |= (rexPrefix & 8) << 4; + + WriteByte((byte)vexByte1); + WriteByte((byte)vexByte2); + } + + opCode &= 0xff; + } + else + { + if (flags.HasFlag(InstructionFlags.Prefix66)) + { + WriteByte(0x66); + } + + if (flags.HasFlag(InstructionFlags.PrefixF2)) + { + WriteByte(0xf2); + } + + if (flags.HasFlag(InstructionFlags.PrefixF3)) + { + WriteByte(0xf3); + } + + if (rexPrefix != 0) + { + WriteByte((byte)rexPrefix); + } + } + + if (dest != default && (flags & InstructionFlags.RegOnly) != 0) + { + opCode += dest.GetRegister().Index & 7; + } + + if ((opCode & 0xff0000) != 0) + { + WriteByte((byte)(opCode >> 16)); + } + + if ((opCode & 0xff00) != 0) + { + WriteByte((byte)(opCode >> 8)); + } + + WriteByte((byte)opCode); + + if ((flags & InstructionFlags.RegOnly) == 0) + { + WriteByte((byte)modRM); + + if (needsSibByte) + { + WriteByte((byte)sib); + } + + if (needsDisplacement) + { + if (ConstFitsOnS8(memOp.Displacement)) + { + WriteByte((byte)memOp.Displacement); + } + else /* if (ConstFitsOnS32(memOp.Displacement)) */ + { + WriteInt32(memOp.Displacement); + } + } + } + } + + private void WriteEvexInst( + Operand dest, + Operand src1, + Operand src2, + OperandType type, + InstructionFlags flags, + int opCode, + bool broadcast = false, + int registerWidth = 128, + int maskRegisterIdx = 0, + bool zeroElements = false) + { + int op1Idx = dest.GetRegister().Index; + int op2Idx = src1.GetRegister().Index; + int op3Idx = src2.GetRegister().Index; + + WriteByte(0x62); + + // P0 + // Extend operand 1 register + bool r = (op1Idx & 8) == 0; + // Extend operand 3 register + bool x = (op3Idx & 16) == 0; + // Extend operand 3 register + bool b = (op3Idx & 8) == 0; + // Extend operand 1 register + bool rp = (op1Idx & 16) == 0; + // Escape code index + byte mm = 0b00; + + switch ((ushort)(opCode >> 8)) + { + case 0xf00: mm = 0b01; break; + case 0xf38: mm = 0b10; break; + case 0xf3a: mm = 0b11; break; + + default: Debug.Fail($"Failed to EVEX encode opcode 0x{opCode:X}."); break; + } + + WriteByte( + (byte)( + (r ? 0x80 : 0) | + (x ? 0x40 : 0) | + (b ? 0x20 : 0) | + (rp ? 0x10 : 0) | + mm)); + + // P1 + // Specify 64-bit lane mode + bool w = Is64Bits(type); + // Operand 2 register index + byte vvvv = (byte)(~op2Idx & 0b1111); + // Opcode prefix + byte pp = (flags & InstructionFlags.PrefixMask) switch + { + InstructionFlags.Prefix66 => 0b01, + InstructionFlags.PrefixF3 => 0b10, + InstructionFlags.PrefixF2 => 0b11, + _ => 0 + }; + WriteByte( + (byte)( + (w ? 0x80 : 0) | + (vvvv << 3) | + 0b100 | + pp)); + + // P2 + // Mask register determines what elements to zero, rather than what elements to merge + bool z = zeroElements; + // Specifies register-width + byte ll = 0b00; + switch (registerWidth) + { + case 128: ll = 0b00; break; + case 256: ll = 0b01; break; + case 512: ll = 0b10; break; + + default: Debug.Fail($"Invalid EVEX vector register width {registerWidth}."); break; + } + // Embedded broadcast in the case of a memory operand + bool bcast = broadcast; + // Extend operand 2 register + bool vp = (op2Idx & 16) == 0; + // Mask register index + Debug.Assert(maskRegisterIdx < 8, $"Invalid mask register index {maskRegisterIdx}."); + byte aaa = (byte)(maskRegisterIdx & 0b111); + + WriteByte( + (byte)( + (z ? 0x80 : 0) | + (ll << 5) | + (bcast ? 0x10 : 0) | + (vp ? 8 : 0) | + aaa)); + } + + private void WriteCompactInst(Operand operand, int opCode) + { + int regIndex = operand.GetRegister().Index; + + if (regIndex >= 8) + { + WriteByte(0x41); + } + + WriteByte((byte)(opCode + (regIndex & 0b111))); + } + + private static int GetRexPrefix(Operand dest, Operand source, OperandType type, bool rrm) + { + int rexPrefix = 0; + + if (Is64Bits(type)) + { + rexPrefix = RexWPrefix; + } + + void SetRegisterHighBit(Register reg, int bit) + { + if (reg.Index >= 8) + { + rexPrefix |= RexPrefix | (reg.Index >> 3) << bit; + } + } + + if (dest != default && dest.Kind == OperandKind.Register) + { + SetRegisterHighBit(dest.GetRegister(), rrm ? 2 : 0); + } + + if (source != default && source.Kind == OperandKind.Register) + { + SetRegisterHighBit(source.GetRegister(), rrm ? 0 : 2); + } + + return rexPrefix; + } + + public (byte[], RelocInfo) GetCode() + { + var jumps = CollectionsMarshal.AsSpan(_jumps); + var relocs = CollectionsMarshal.AsSpan(_relocs); + + // Write jump relative offsets. + bool modified; + + do + { + modified = false; + + for (int i = 0; i < jumps.Length; i++) + { + ref Jump jump = ref jumps[i]; + + // If jump target not resolved yet, resolve it. + if (jump.JumpTarget == null) + { + jump.JumpTarget = _labels[jump.JumpLabel]; + } + + long jumpTarget = jump.JumpTarget.Value; + long offset = jumpTarget - jump.JumpPosition; + + if (offset < 0) + { + for (int j = i - 1; j >= 0; j--) + { + ref Jump jump2 = ref jumps[j]; + + if (jump2.JumpPosition < jumpTarget) + { + break; + } + + offset -= jump2.InstSize - ReservedBytesForJump; + } + } + else + { + for (int j = i + 1; j < jumps.Length; j++) + { + ref Jump jump2 = ref jumps[j]; + + if (jump2.JumpPosition >= jumpTarget) + { + break; + } + + offset += jump2.InstSize - ReservedBytesForJump; + } + + offset -= ReservedBytesForJump; + } + + if (jump.IsConditional) + { + jump.InstSize = GetJccLength(offset); + } + else + { + jump.InstSize = GetJmpLength(offset); + } + + // The jump is relative to the next instruction, not the current one. + // Since we didn't know the next instruction address when calculating + // the offset (as the size of the current jump instruction was not known), + // we now need to compensate the offset with the jump instruction size. + // It's also worth noting that: + // - This is only needed for backward jumps. + // - The GetJmpLength and GetJccLength also compensates the offset + // internally when computing the jump instruction size. + if (offset < 0) + { + offset -= jump.InstSize; + } + + if (jump.Offset != offset) + { + jump.Offset = offset; + + modified = true; + } + } + } + while (modified); + + // Write the code, ignoring the dummy bytes after jumps, into a new stream. + _stream.Seek(0, SeekOrigin.Begin); + + using var codeStream = MemoryStreamManager.Shared.GetStream(); + var assembler = new Assembler(codeStream, HasRelocs); + + bool hasRelocs = HasRelocs; + int relocIndex = 0; + int relocOffset = 0; + var relocEntries = hasRelocs + ? new RelocEntry[relocs.Length] + : Array.Empty<RelocEntry>(); + + for (int i = 0; i < jumps.Length; i++) + { + ref Jump jump = ref jumps[i]; + + // If has relocations, calculate their new positions compensating for jumps. + if (hasRelocs) + { + relocOffset += jump.InstSize - ReservedBytesForJump; + + for (; relocIndex < relocEntries.Length; relocIndex++) + { + ref Reloc reloc = ref relocs[relocIndex]; + + if (reloc.JumpIndex > i) + { + break; + } + + relocEntries[relocIndex] = new RelocEntry(reloc.Position + relocOffset, reloc.Symbol); + } + } + + Span<byte> buffer = new byte[jump.JumpPosition - _stream.Position]; + + _stream.Read(buffer); + _stream.Seek(ReservedBytesForJump, SeekOrigin.Current); + + codeStream.Write(buffer); + + if (jump.IsConditional) + { + assembler.Jcc(jump.Condition, jump.Offset); + } + else + { + assembler.Jmp(jump.Offset); + } + } + + // Write remaining relocations. This case happens when there are no jumps assembled. + for (; relocIndex < relocEntries.Length; relocIndex++) + { + ref Reloc reloc = ref relocs[relocIndex]; + + relocEntries[relocIndex] = new RelocEntry(reloc.Position + relocOffset, reloc.Symbol); + } + + _stream.CopyTo(codeStream); + + var code = codeStream.ToArray(); + var relocInfo = new RelocInfo(relocEntries); + + return (code, relocInfo); + } + + private static bool Is64Bits(OperandType type) + { + return type == OperandType.I64 || type == OperandType.FP64; + } + + private static bool IsImm8(ulong immediate, OperandType type) + { + long value = type == OperandType.I32 ? (int)immediate : (long)immediate; + + return ConstFitsOnS8(value); + } + + private static bool IsImm32(ulong immediate, OperandType type) + { + long value = type == OperandType.I32 ? (int)immediate : (long)immediate; + + return ConstFitsOnS32(value); + } + + private static int GetJccLength(long offset) + { + if (ConstFitsOnS8(offset < 0 ? offset - 2 : offset)) + { + return 2; + } + else if (ConstFitsOnS32(offset < 0 ? offset - 6 : offset)) + { + return 6; + } + else + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } + } + + private static int GetJmpLength(long offset) + { + if (ConstFitsOnS8(offset < 0 ? offset - 2 : offset)) + { + return 2; + } + else if (ConstFitsOnS32(offset < 0 ? offset - 5 : offset)) + { + return 5; + } + else + { + throw new ArgumentOutOfRangeException(nameof(offset)); + } + } + + private static bool ConstFitsOnS8(long value) + { + return value == (sbyte)value; + } + + private static bool ConstFitsOnS32(long value) + { + return value == (int)value; + } + + private void WriteInt16(short value) + { + WriteUInt16((ushort)value); + } + + private void WriteInt32(int value) + { + WriteUInt32((uint)value); + } + + private void WriteByte(byte value) + { + _stream.WriteByte(value); + } + + private void WriteUInt16(ushort value) + { + _stream.WriteByte((byte)(value >> 0)); + _stream.WriteByte((byte)(value >> 8)); + } + + private void WriteUInt32(uint value) + { + _stream.WriteByte((byte)(value >> 0)); + _stream.WriteByte((byte)(value >> 8)); + _stream.WriteByte((byte)(value >> 16)); + _stream.WriteByte((byte)(value >> 24)); + } + + private void WriteUInt64(ulong value) + { + _stream.WriteByte((byte)(value >> 0)); + _stream.WriteByte((byte)(value >> 8)); + _stream.WriteByte((byte)(value >> 16)); + _stream.WriteByte((byte)(value >> 24)); + _stream.WriteByte((byte)(value >> 32)); + _stream.WriteByte((byte)(value >> 40)); + _stream.WriteByte((byte)(value >> 48)); + _stream.WriteByte((byte)(value >> 56)); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/AssemblerTable.cs b/src/ARMeilleure/CodeGen/X86/AssemblerTable.cs new file mode 100644 index 00000000..e6a2ff07 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/AssemblerTable.cs @@ -0,0 +1,295 @@ +using System; + +namespace ARMeilleure.CodeGen.X86 +{ + partial class Assembler + { + public static bool SupportsVexPrefix(X86Instruction inst) + { + return _instTable[(int)inst].Flags.HasFlag(InstructionFlags.Vex); + } + + private const int BadOp = 0; + + [Flags] + private enum InstructionFlags + { + None = 0, + RegOnly = 1 << 0, + Reg8Src = 1 << 1, + Reg8Dest = 1 << 2, + RexW = 1 << 3, + Vex = 1 << 4, + Evex = 1 << 5, + + PrefixBit = 16, + PrefixMask = 7 << PrefixBit, + Prefix66 = 1 << PrefixBit, + PrefixF3 = 2 << PrefixBit, + PrefixF2 = 4 << PrefixBit + } + + private readonly struct InstructionInfo + { + public int OpRMR { get; } + public int OpRMImm8 { get; } + public int OpRMImm32 { get; } + public int OpRImm64 { get; } + public int OpRRM { get; } + + public InstructionFlags Flags { get; } + + public InstructionInfo( + int opRMR, + int opRMImm8, + int opRMImm32, + int opRImm64, + int opRRM, + InstructionFlags flags) + { + OpRMR = opRMR; + OpRMImm8 = opRMImm8; + OpRMImm32 = opRMImm32; + OpRImm64 = opRImm64; + OpRRM = opRRM; + Flags = flags; + } + } + + private readonly static InstructionInfo[] _instTable; + + static Assembler() + { + _instTable = new InstructionInfo[(int)X86Instruction.Count]; + + // Name RM/R RM/I8 RM/I32 R/I64 R/RM Flags + Add(X86Instruction.Add, new InstructionInfo(0x00000001, 0x00000083, 0x00000081, BadOp, 0x00000003, InstructionFlags.None)); + Add(X86Instruction.Addpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Addps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex)); + Add(X86Instruction.Addsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Addss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f58, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Aesdec, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38de, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Aesdeclast, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38df, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Aesenc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38dc, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Aesenclast, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38dd, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Aesimc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38db, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.And, new InstructionInfo(0x00000021, 0x04000083, 0x04000081, BadOp, 0x00000023, InstructionFlags.None)); + Add(X86Instruction.Andnpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Andnps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f55, InstructionFlags.Vex)); + Add(X86Instruction.Andpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f54, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Andps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f54, InstructionFlags.Vex)); + Add(X86Instruction.Blendvpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3815, InstructionFlags.Prefix66)); + Add(X86Instruction.Blendvps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3814, InstructionFlags.Prefix66)); + Add(X86Instruction.Bsr, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbd, InstructionFlags.None)); + Add(X86Instruction.Bswap, new InstructionInfo(0x00000fc8, BadOp, BadOp, BadOp, BadOp, InstructionFlags.RegOnly)); + Add(X86Instruction.Call, new InstructionInfo(0x020000ff, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Cmovcc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f40, InstructionFlags.None)); + Add(X86Instruction.Cmp, new InstructionInfo(0x00000039, 0x07000083, 0x07000081, BadOp, 0x0000003b, InstructionFlags.None)); + Add(X86Instruction.Cmppd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Cmpps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex)); + Add(X86Instruction.Cmpsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Cmpss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc2, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Cmpxchg, new InstructionInfo(0x00000fb1, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Cmpxchg16b, new InstructionInfo(0x01000fc7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.RexW)); + Add(X86Instruction.Cmpxchg8, new InstructionInfo(0x00000fb0, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Reg8Src)); + Add(X86Instruction.Comisd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Comiss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2f, InstructionFlags.Vex)); + Add(X86Instruction.Crc32, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38f1, InstructionFlags.PrefixF2)); + Add(X86Instruction.Crc32_16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38f1, InstructionFlags.PrefixF2 | InstructionFlags.Prefix66)); + Add(X86Instruction.Crc32_8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38f0, InstructionFlags.PrefixF2 | InstructionFlags.Reg8Src)); + Add(X86Instruction.Cvtdq2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Cvtdq2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex)); + Add(X86Instruction.Cvtpd2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe6, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Cvtpd2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Cvtps2dq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5b, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Cvtps2pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex)); + Add(X86Instruction.Cvtsd2si, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2d, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Cvtsd2ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Cvtsi2sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Cvtsi2ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2a, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Cvtss2sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5a, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Cvtss2si, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f2d, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Div, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x060000f7, InstructionFlags.None)); + Add(X86Instruction.Divpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Divps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex)); + Add(X86Instruction.Divsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Divss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5e, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Gf2p8affineqb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3ace, InstructionFlags.Prefix66)); + Add(X86Instruction.Haddpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Haddps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7c, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Idiv, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x070000f7, InstructionFlags.None)); + Add(X86Instruction.Imul, new InstructionInfo(BadOp, 0x0000006b, 0x00000069, BadOp, 0x00000faf, InstructionFlags.None)); + Add(X86Instruction.Imul128, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x050000f7, InstructionFlags.None)); + Add(X86Instruction.Insertps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a21, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Jmp, new InstructionInfo(0x040000ff, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Ldmxcsr, new InstructionInfo(0x02000fae, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex)); + Add(X86Instruction.Lea, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x0000008d, InstructionFlags.None)); + Add(X86Instruction.Maxpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Maxps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex)); + Add(X86Instruction.Maxsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Maxss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5f, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Minpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Minps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex)); + Add(X86Instruction.Minsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Minss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5d, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Mov, new InstructionInfo(0x00000089, BadOp, 0x000000c7, 0x000000b8, 0x0000008b, InstructionFlags.None)); + Add(X86Instruction.Mov16, new InstructionInfo(0x00000089, BadOp, 0x000000c7, BadOp, 0x0000008b, InstructionFlags.Prefix66)); + Add(X86Instruction.Mov8, new InstructionInfo(0x00000088, 0x000000c6, BadOp, BadOp, 0x0000008a, InstructionFlags.Reg8Src | InstructionFlags.Reg8Dest)); + Add(X86Instruction.Movd, new InstructionInfo(0x00000f7e, BadOp, BadOp, BadOp, 0x00000f6e, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Movdqu, new InstructionInfo(0x00000f7f, BadOp, BadOp, BadOp, 0x00000f6f, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Movhlps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f12, InstructionFlags.Vex)); + Add(X86Instruction.Movlhps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f16, InstructionFlags.Vex)); + Add(X86Instruction.Movq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f7e, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Movsd, new InstructionInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Movss, new InstructionInfo(0x00000f11, BadOp, BadOp, BadOp, 0x00000f10, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Movsx16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbf, InstructionFlags.None)); + Add(X86Instruction.Movsx32, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000063, InstructionFlags.None)); + Add(X86Instruction.Movsx8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fbe, InstructionFlags.Reg8Src)); + Add(X86Instruction.Movzx16, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb7, InstructionFlags.None)); + Add(X86Instruction.Movzx8, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb6, InstructionFlags.Reg8Src)); + Add(X86Instruction.Mul128, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x040000f7, InstructionFlags.None)); + Add(X86Instruction.Mulpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Mulps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex)); + Add(X86Instruction.Mulsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Mulss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f59, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Neg, new InstructionInfo(0x030000f7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Not, new InstructionInfo(0x020000f7, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Or, new InstructionInfo(0x00000009, 0x01000083, 0x01000081, BadOp, 0x0000000b, InstructionFlags.None)); + Add(X86Instruction.Paddb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffc, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Paddd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffe, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Paddq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd4, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Paddw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffd, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Palignr, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0f, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pand, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdb, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pandn, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fdf, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pavgb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe0, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pavgw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fe3, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3810, InstructionFlags.Prefix66)); + Add(X86Instruction.Pclmulqdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a44, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpeqb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f74, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpeqd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f76, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpeqq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3829, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpeqw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f75, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpgtb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f64, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpgtd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f66, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpgtq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3837, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pcmpgtw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f65, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pextrb, new InstructionInfo(0x000f3a14, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pextrd, new InstructionInfo(0x000f3a16, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pextrq, new InstructionInfo(0x000f3a16, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.RexW | InstructionFlags.Prefix66)); + Add(X86Instruction.Pextrw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc5, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pinsrb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a20, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pinsrd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a22, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pinsrq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a22, InstructionFlags.Vex | InstructionFlags.RexW | InstructionFlags.Prefix66)); + Add(X86Instruction.Pinsrw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc4, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmaxsb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383c, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmaxsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383d, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmaxsw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fee, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmaxub, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fde, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmaxud, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383f, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmaxuw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383e, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pminsb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3838, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pminsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3839, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pminsw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fea, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pminub, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fda, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pminud, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383b, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pminuw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f383a, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmovsxbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3820, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmovsxdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3825, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmovsxwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3823, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmovzxbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3830, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmovzxdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3835, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmovzxwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3833, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmulld, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3840, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pmullw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fd5, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pop, new InstructionInfo(0x0000008f, BadOp, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Popcnt, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fb8, InstructionFlags.PrefixF3)); + Add(X86Instruction.Por, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000feb, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pshufb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3800, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pshufd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f70, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pslld, new InstructionInfo(BadOp, 0x06000f72, BadOp, BadOp, 0x00000ff2, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Pslldq, new InstructionInfo(BadOp, 0x07000f73, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psllq, new InstructionInfo(BadOp, 0x06000f73, BadOp, BadOp, 0x00000ff3, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psllw, new InstructionInfo(BadOp, 0x06000f71, BadOp, BadOp, 0x00000ff1, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psrad, new InstructionInfo(BadOp, 0x04000f72, BadOp, BadOp, 0x00000fe2, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psraw, new InstructionInfo(BadOp, 0x04000f71, BadOp, BadOp, 0x00000fe1, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psrld, new InstructionInfo(BadOp, 0x02000f72, BadOp, BadOp, 0x00000fd2, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psrlq, new InstructionInfo(BadOp, 0x02000f73, BadOp, BadOp, 0x00000fd3, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psrldq, new InstructionInfo(BadOp, 0x03000f73, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psrlw, new InstructionInfo(BadOp, 0x02000f71, BadOp, BadOp, 0x00000fd1, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psubb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff8, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psubd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffa, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psubq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ffb, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Psubw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000ff9, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpckhbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f68, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpckhdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6a, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpckhqdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6d, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpckhwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f69, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpcklbw, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f60, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpckldq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f62, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpcklqdq, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f6c, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Punpcklwd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f61, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Push, new InstructionInfo(BadOp, 0x0000006a, 0x00000068, BadOp, 0x060000ff, InstructionFlags.None)); + Add(X86Instruction.Pxor, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fef, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Rcpps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstructionFlags.Vex)); + Add(X86Instruction.Rcpss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f53, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Ror, new InstructionInfo(0x010000d3, 0x010000c1, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Roundpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a09, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Roundps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a08, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Roundsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0b, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Roundss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a0a, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Rsqrtps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstructionFlags.Vex)); + Add(X86Instruction.Rsqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f52, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Sar, new InstructionInfo(0x070000d3, 0x070000c1, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Setcc, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f90, InstructionFlags.Reg8Dest)); + Add(X86Instruction.Sha256Msg1, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cc, InstructionFlags.None)); + Add(X86Instruction.Sha256Msg2, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cd, InstructionFlags.None)); + Add(X86Instruction.Sha256Rnds2, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38cb, InstructionFlags.None)); + Add(X86Instruction.Shl, new InstructionInfo(0x040000d3, 0x040000c1, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Shr, new InstructionInfo(0x050000d3, 0x050000c1, BadOp, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Shufpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Shufps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000fc6, InstructionFlags.Vex)); + Add(X86Instruction.Sqrtpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Sqrtps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex)); + Add(X86Instruction.Sqrtsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Sqrtss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f51, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Stmxcsr, new InstructionInfo(0x03000fae, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex)); + Add(X86Instruction.Sub, new InstructionInfo(0x00000029, 0x05000083, 0x05000081, BadOp, 0x0000002b, InstructionFlags.None)); + Add(X86Instruction.Subpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Subps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex)); + Add(X86Instruction.Subsd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.PrefixF2)); + Add(X86Instruction.Subss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f5c, InstructionFlags.Vex | InstructionFlags.PrefixF3)); + Add(X86Instruction.Test, new InstructionInfo(0x00000085, BadOp, 0x000000f7, BadOp, BadOp, InstructionFlags.None)); + Add(X86Instruction.Unpckhpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Unpckhps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f15, InstructionFlags.Vex)); + Add(X86Instruction.Unpcklpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Unpcklps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f14, InstructionFlags.Vex)); + Add(X86Instruction.Vblendvpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4b, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vblendvps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4a, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vcvtph2ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3813, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vcvtps2ph, new InstructionInfo(0x000f3a1d, BadOp, BadOp, BadOp, BadOp, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vfmadd231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); + Add(X86Instruction.Vfmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b8, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vfmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); + Add(X86Instruction.Vfmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38b9, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vfmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); + Add(X86Instruction.Vfmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bb, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vfnmadd231pd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); + Add(X86Instruction.Vfnmadd231ps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bc, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vfnmadd231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); + Add(X86Instruction.Vfnmadd231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bd, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vfnmsub231sd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66 | InstructionFlags.RexW)); + Add(X86Instruction.Vfnmsub231ss, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f38bf, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vpblendvb, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a4c, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Vpternlogd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x000f3a25, InstructionFlags.Evex | InstructionFlags.Prefix66)); + Add(X86Instruction.Xor, new InstructionInfo(0x00000031, 0x06000083, 0x06000081, BadOp, 0x00000033, InstructionFlags.None)); + Add(X86Instruction.Xorpd, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex | InstructionFlags.Prefix66)); + Add(X86Instruction.Xorps, new InstructionInfo(BadOp, BadOp, BadOp, BadOp, 0x00000f57, InstructionFlags.Vex)); + + static void Add(X86Instruction inst, in InstructionInfo info) + { + _instTable[(int)inst] = info; + } + } + } +} diff --git a/src/ARMeilleure/CodeGen/X86/CallConvName.cs b/src/ARMeilleure/CodeGen/X86/CallConvName.cs new file mode 100644 index 00000000..be367628 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/CallConvName.cs @@ -0,0 +1,8 @@ +namespace ARMeilleure.CodeGen.X86 +{ + enum CallConvName + { + SystemV, + Windows + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/CallingConvention.cs b/src/ARMeilleure/CodeGen/X86/CallingConvention.cs new file mode 100644 index 00000000..953fef5b --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/CallingConvention.cs @@ -0,0 +1,158 @@ +using System; + +namespace ARMeilleure.CodeGen.X86 +{ + static class CallingConvention + { + private const int RegistersMask = 0xffff; + + public static int GetIntAvailableRegisters() + { + return RegistersMask & ~(1 << (int)X86Register.Rsp); + } + + public static int GetVecAvailableRegisters() + { + return RegistersMask; + } + + public static int GetIntCallerSavedRegisters() + { + if (GetCurrentCallConv() == CallConvName.Windows) + { + return (1 << (int)X86Register.Rax) | + (1 << (int)X86Register.Rcx) | + (1 << (int)X86Register.Rdx) | + (1 << (int)X86Register.R8) | + (1 << (int)X86Register.R9) | + (1 << (int)X86Register.R10) | + (1 << (int)X86Register.R11); + } + else /* if (GetCurrentCallConv() == CallConvName.SystemV) */ + { + return (1 << (int)X86Register.Rax) | + (1 << (int)X86Register.Rcx) | + (1 << (int)X86Register.Rdx) | + (1 << (int)X86Register.Rsi) | + (1 << (int)X86Register.Rdi) | + (1 << (int)X86Register.R8) | + (1 << (int)X86Register.R9) | + (1 << (int)X86Register.R10) | + (1 << (int)X86Register.R11); + } + } + + public static int GetVecCallerSavedRegisters() + { + if (GetCurrentCallConv() == CallConvName.Windows) + { + return (1 << (int)X86Register.Xmm0) | + (1 << (int)X86Register.Xmm1) | + (1 << (int)X86Register.Xmm2) | + (1 << (int)X86Register.Xmm3) | + (1 << (int)X86Register.Xmm4) | + (1 << (int)X86Register.Xmm5); + } + else /* if (GetCurrentCallConv() == CallConvName.SystemV) */ + { + return RegistersMask; + } + } + + public static int GetIntCalleeSavedRegisters() + { + return GetIntCallerSavedRegisters() ^ RegistersMask; + } + + public static int GetVecCalleeSavedRegisters() + { + return GetVecCallerSavedRegisters() ^ RegistersMask; + } + + public static int GetArgumentsOnRegsCount() + { + return 4; + } + + public static int GetIntArgumentsOnRegsCount() + { + return 6; + } + + public static int GetVecArgumentsOnRegsCount() + { + return 8; + } + + public static X86Register GetIntArgumentRegister(int index) + { + if (GetCurrentCallConv() == CallConvName.Windows) + { + switch (index) + { + case 0: return X86Register.Rcx; + case 1: return X86Register.Rdx; + case 2: return X86Register.R8; + case 3: return X86Register.R9; + } + } + else /* if (GetCurrentCallConv() == CallConvName.SystemV) */ + { + switch (index) + { + case 0: return X86Register.Rdi; + case 1: return X86Register.Rsi; + case 2: return X86Register.Rdx; + case 3: return X86Register.Rcx; + case 4: return X86Register.R8; + case 5: return X86Register.R9; + } + } + + throw new ArgumentOutOfRangeException(nameof(index)); + } + + public static X86Register GetVecArgumentRegister(int index) + { + int count; + + if (GetCurrentCallConv() == CallConvName.Windows) + { + count = 4; + } + else /* if (GetCurrentCallConv() == CallConvName.SystemV) */ + { + count = 8; + } + + if ((uint)index < count) + { + return X86Register.Xmm0 + index; + } + + throw new ArgumentOutOfRangeException(nameof(index)); + } + + public static X86Register GetIntReturnRegister() + { + return X86Register.Rax; + } + + public static X86Register GetIntReturnRegisterHigh() + { + return X86Register.Rdx; + } + + public static X86Register GetVecReturnRegister() + { + return X86Register.Xmm0; + } + + public static CallConvName GetCurrentCallConv() + { + return OperatingSystem.IsWindows() + ? CallConvName.Windows + : CallConvName.SystemV; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/CodeGenCommon.cs b/src/ARMeilleure/CodeGen/X86/CodeGenCommon.cs new file mode 100644 index 00000000..237ecee4 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/CodeGenCommon.cs @@ -0,0 +1,19 @@ +using ARMeilleure.IntermediateRepresentation; + +namespace ARMeilleure.CodeGen.X86 +{ + static class CodeGenCommon + { + public static bool IsLongConst(Operand op) + { + long value = op.Type == OperandType.I32 ? op.AsInt32() : op.AsInt64(); + + return !ConstFitsOnS32(value); + } + + private static bool ConstFitsOnS32(long value) + { + return value == (int)value; + } + } +} diff --git a/src/ARMeilleure/CodeGen/X86/CodeGenContext.cs b/src/ARMeilleure/CodeGen/X86/CodeGenContext.cs new file mode 100644 index 00000000..89948724 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/CodeGenContext.cs @@ -0,0 +1,105 @@ +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.IntermediateRepresentation; +using Ryujinx.Common.Memory; +using System.IO; +using System.Numerics; + +namespace ARMeilleure.CodeGen.X86 +{ + class CodeGenContext + { + private readonly Stream _stream; + private readonly Operand[] _blockLabels; + + public int StreamOffset => (int)_stream.Length; + + public AllocationResult AllocResult { get; } + + public Assembler Assembler { get; } + public BasicBlock CurrBlock { get; private set; } + + public int CallArgsRegionSize { get; } + public int XmmSaveRegionSize { get; } + + public CodeGenContext(AllocationResult allocResult, int maxCallArgs, int blocksCount, bool relocatable) + { + _stream = MemoryStreamManager.Shared.GetStream(); + _blockLabels = new Operand[blocksCount]; + + AllocResult = allocResult; + Assembler = new Assembler(_stream, relocatable); + + CallArgsRegionSize = GetCallArgsRegionSize(allocResult, maxCallArgs, out int xmmSaveRegionSize); + XmmSaveRegionSize = xmmSaveRegionSize; + } + + private static int GetCallArgsRegionSize(AllocationResult allocResult, int maxCallArgs, out int xmmSaveRegionSize) + { + // We need to add 8 bytes to the total size, as the call to this function already pushed 8 bytes (the + // return address). + int intMask = CallingConvention.GetIntCalleeSavedRegisters() & allocResult.IntUsedRegisters; + int vecMask = CallingConvention.GetVecCalleeSavedRegisters() & allocResult.VecUsedRegisters; + + xmmSaveRegionSize = BitOperations.PopCount((uint)vecMask) * 16; + + int calleeSaveRegionSize = BitOperations.PopCount((uint)intMask) * 8 + xmmSaveRegionSize + 8; + + int argsCount = maxCallArgs; + + if (argsCount < 0) + { + // When the function has no calls, argsCount is -1. In this case, we don't need to allocate the shadow + // space. + argsCount = 0; + } + else if (argsCount < 4) + { + // The ABI mandates that the space for at least 4 arguments is reserved on the stack (this is called + // shadow space). + argsCount = 4; + } + + // TODO: Align XMM save region to 16 bytes because unwinding on Windows requires it. + int frameSize = calleeSaveRegionSize + allocResult.SpillRegionSize; + + // TODO: Instead of always multiplying by 16 (the largest possible size of a variable, since a V128 has 16 + // bytes), we should calculate the exact size consumed by the arguments passed to the called functions on + // the stack. + int callArgsAndFrameSize = frameSize + argsCount * 16; + + // Ensure that the Stack Pointer will be aligned to 16 bytes. + callArgsAndFrameSize = (callArgsAndFrameSize + 0xf) & ~0xf; + + return callArgsAndFrameSize - frameSize; + } + + public void EnterBlock(BasicBlock block) + { + Assembler.MarkLabel(GetLabel(block)); + + CurrBlock = block; + } + + public void JumpTo(BasicBlock target) + { + Assembler.Jmp(GetLabel(target)); + } + + public void JumpTo(X86Condition condition, BasicBlock target) + { + Assembler.Jcc(condition, GetLabel(target)); + } + + private Operand GetLabel(BasicBlock block) + { + ref Operand label = ref _blockLabels[block.Index]; + + if (label == default) + { + label = Operand.Factory.Label(); + } + + return label; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/CodeGenerator.cs b/src/ARMeilleure/CodeGen/X86/CodeGenerator.cs new file mode 100644 index 00000000..e7179b51 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/CodeGenerator.cs @@ -0,0 +1,1865 @@ +using ARMeilleure.CodeGen.Linking; +using ARMeilleure.CodeGen.Optimizations; +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.CodeGen.Unwinding; +using ARMeilleure.Common; +using ARMeilleure.Diagnostics; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Numerics; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.CodeGen.X86 +{ + static class CodeGenerator + { + private const int RegistersCount = 16; + private const int PageSize = 0x1000; + private const int StackGuardSize = 0x2000; + + private static readonly Action<CodeGenContext, Operation>[] _instTable; + + static CodeGenerator() + { + _instTable = new Action<CodeGenContext, Operation>[EnumUtils.GetCount(typeof(Instruction))]; + + Add(Instruction.Add, GenerateAdd); + Add(Instruction.BitwiseAnd, GenerateBitwiseAnd); + Add(Instruction.BitwiseExclusiveOr, GenerateBitwiseExclusiveOr); + Add(Instruction.BitwiseNot, GenerateBitwiseNot); + Add(Instruction.BitwiseOr, GenerateBitwiseOr); + Add(Instruction.BranchIf, GenerateBranchIf); + Add(Instruction.ByteSwap, GenerateByteSwap); + Add(Instruction.Call, GenerateCall); + Add(Instruction.Clobber, GenerateClobber); + Add(Instruction.Compare, GenerateCompare); + Add(Instruction.CompareAndSwap, GenerateCompareAndSwap); + Add(Instruction.CompareAndSwap16, GenerateCompareAndSwap16); + Add(Instruction.CompareAndSwap8, GenerateCompareAndSwap8); + Add(Instruction.ConditionalSelect, GenerateConditionalSelect); + Add(Instruction.ConvertI64ToI32, GenerateConvertI64ToI32); + Add(Instruction.ConvertToFP, GenerateConvertToFP); + Add(Instruction.Copy, GenerateCopy); + Add(Instruction.CountLeadingZeros, GenerateCountLeadingZeros); + Add(Instruction.Divide, GenerateDivide); + Add(Instruction.DivideUI, GenerateDivideUI); + Add(Instruction.Fill, GenerateFill); + Add(Instruction.Load, GenerateLoad); + Add(Instruction.Load16, GenerateLoad16); + Add(Instruction.Load8, GenerateLoad8); + Add(Instruction.MemoryBarrier, GenerateMemoryBarrier); + Add(Instruction.Multiply, GenerateMultiply); + Add(Instruction.Multiply64HighSI, GenerateMultiply64HighSI); + Add(Instruction.Multiply64HighUI, GenerateMultiply64HighUI); + Add(Instruction.Negate, GenerateNegate); + Add(Instruction.Return, GenerateReturn); + Add(Instruction.RotateRight, GenerateRotateRight); + Add(Instruction.ShiftLeft, GenerateShiftLeft); + Add(Instruction.ShiftRightSI, GenerateShiftRightSI); + Add(Instruction.ShiftRightUI, GenerateShiftRightUI); + Add(Instruction.SignExtend16, GenerateSignExtend16); + Add(Instruction.SignExtend32, GenerateSignExtend32); + Add(Instruction.SignExtend8, GenerateSignExtend8); + Add(Instruction.Spill, GenerateSpill); + Add(Instruction.SpillArg, GenerateSpillArg); + Add(Instruction.StackAlloc, GenerateStackAlloc); + Add(Instruction.Store, GenerateStore); + Add(Instruction.Store16, GenerateStore16); + Add(Instruction.Store8, GenerateStore8); + Add(Instruction.Subtract, GenerateSubtract); + Add(Instruction.Tailcall, GenerateTailcall); + Add(Instruction.VectorCreateScalar, GenerateVectorCreateScalar); + Add(Instruction.VectorExtract, GenerateVectorExtract); + Add(Instruction.VectorExtract16, GenerateVectorExtract16); + Add(Instruction.VectorExtract8, GenerateVectorExtract8); + Add(Instruction.VectorInsert, GenerateVectorInsert); + Add(Instruction.VectorInsert16, GenerateVectorInsert16); + Add(Instruction.VectorInsert8, GenerateVectorInsert8); + Add(Instruction.VectorOne, GenerateVectorOne); + Add(Instruction.VectorZero, GenerateVectorZero); + Add(Instruction.VectorZeroUpper64, GenerateVectorZeroUpper64); + Add(Instruction.VectorZeroUpper96, GenerateVectorZeroUpper96); + Add(Instruction.ZeroExtend16, GenerateZeroExtend16); + Add(Instruction.ZeroExtend32, GenerateZeroExtend32); + Add(Instruction.ZeroExtend8, GenerateZeroExtend8); + + static void Add(Instruction inst, Action<CodeGenContext, Operation> func) + { + _instTable[(int)inst] = func; + } + } + + public static CompiledFunction Generate(CompilerContext cctx) + { + ControlFlowGraph cfg = cctx.Cfg; + + Logger.StartPass(PassName.Optimization); + + if (cctx.Options.HasFlag(CompilerOptions.Optimize)) + { + if (cctx.Options.HasFlag(CompilerOptions.SsaForm)) + { + Optimizer.RunPass(cfg); + } + + BlockPlacement.RunPass(cfg); + } + + X86Optimizer.RunPass(cfg); + + Logger.EndPass(PassName.Optimization, cfg); + + Logger.StartPass(PassName.PreAllocation); + + StackAllocator stackAlloc = new(); + + PreAllocator.RunPass(cctx, stackAlloc, out int maxCallArgs); + + Logger.EndPass(PassName.PreAllocation, cfg); + + Logger.StartPass(PassName.RegisterAllocation); + + if (cctx.Options.HasFlag(CompilerOptions.SsaForm)) + { + Ssa.Deconstruct(cfg); + } + + IRegisterAllocator regAlloc; + + if (cctx.Options.HasFlag(CompilerOptions.Lsra)) + { + regAlloc = new LinearScanAllocator(); + } + else + { + regAlloc = new HybridAllocator(); + } + + RegisterMasks regMasks = new( + CallingConvention.GetIntAvailableRegisters(), + CallingConvention.GetVecAvailableRegisters(), + CallingConvention.GetIntCallerSavedRegisters(), + CallingConvention.GetVecCallerSavedRegisters(), + CallingConvention.GetIntCalleeSavedRegisters(), + CallingConvention.GetVecCalleeSavedRegisters(), + RegistersCount); + + AllocationResult allocResult = regAlloc.RunPass(cfg, stackAlloc, regMasks); + + Logger.EndPass(PassName.RegisterAllocation, cfg); + + Logger.StartPass(PassName.CodeGeneration); + + bool relocatable = (cctx.Options & CompilerOptions.Relocatable) != 0; + + CodeGenContext context = new(allocResult, maxCallArgs, cfg.Blocks.Count, relocatable); + + UnwindInfo unwindInfo = WritePrologue(context); + + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + context.EnterBlock(block); + + for (Operation node = block.Operations.First; node != default; node = node.ListNext) + { + GenerateOperation(context, node); + } + + if (block.SuccessorsCount == 0) + { + // The only blocks which can have 0 successors are exit blocks. + Operation last = block.Operations.Last; + + Debug.Assert(last.Instruction == Instruction.Tailcall || + last.Instruction == Instruction.Return); + } + else + { + BasicBlock succ = block.GetSuccessor(0); + + if (succ != block.ListNext) + { + context.JumpTo(succ); + } + } + } + + (byte[] code, RelocInfo relocInfo) = context.Assembler.GetCode(); + + Logger.EndPass(PassName.CodeGeneration); + + return new CompiledFunction(code, unwindInfo, relocInfo); + } + + private static void GenerateOperation(CodeGenContext context, Operation operation) + { + if (operation.Instruction == Instruction.Extended) + { + IntrinsicInfo info = IntrinsicTable.GetInfo(operation.Intrinsic); + + switch (info.Type) + { + case IntrinsicType.Comis_: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + switch (operation.Intrinsic) + { + case Intrinsic.X86Comisdeq: + context.Assembler.Comisd(src1, src2); + context.Assembler.Setcc(dest, X86Condition.Equal); + break; + + case Intrinsic.X86Comisdge: + context.Assembler.Comisd(src1, src2); + context.Assembler.Setcc(dest, X86Condition.AboveOrEqual); + break; + + case Intrinsic.X86Comisdlt: + context.Assembler.Comisd(src1, src2); + context.Assembler.Setcc(dest, X86Condition.Below); + break; + + case Intrinsic.X86Comisseq: + context.Assembler.Comiss(src1, src2); + context.Assembler.Setcc(dest, X86Condition.Equal); + break; + + case Intrinsic.X86Comissge: + context.Assembler.Comiss(src1, src2); + context.Assembler.Setcc(dest, X86Condition.AboveOrEqual); + break; + + case Intrinsic.X86Comisslt: + context.Assembler.Comiss(src1, src2); + context.Assembler.Setcc(dest, X86Condition.Below); + break; + } + + context.Assembler.Movzx8(dest, dest, OperandType.I32); + + break; + } + + case IntrinsicType.Mxcsr: + { + Operand offset = operation.GetSource(0); + + Debug.Assert(offset.Kind == OperandKind.Constant); + Debug.Assert(offset.Type == OperandType.I32); + + int offs = offset.AsInt32() + context.CallArgsRegionSize; + + Operand rsp = Register(X86Register.Rsp); + Operand memOp = MemoryOp(OperandType.I32, rsp, default, Multiplier.x1, offs); + + Debug.Assert(HardwareCapabilities.SupportsSse || HardwareCapabilities.SupportsVexEncoding); + + if (operation.Intrinsic == Intrinsic.X86Ldmxcsr) + { + Operand bits = operation.GetSource(1); + Debug.Assert(bits.Type == OperandType.I32); + + context.Assembler.Mov(memOp, bits, OperandType.I32); + context.Assembler.Ldmxcsr(memOp); + } + else if (operation.Intrinsic == Intrinsic.X86Stmxcsr) + { + Operand dest = operation.Destination; + Debug.Assert(dest.Type == OperandType.I32); + + context.Assembler.Stmxcsr(memOp); + context.Assembler.Mov(dest, memOp, OperandType.I32); + } + + break; + } + + case IntrinsicType.PopCount: + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + EnsureSameType(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Popcnt(dest, source, dest.Type); + + break; + } + + case IntrinsicType.Unary: + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + EnsureSameType(dest, source); + + Debug.Assert(!dest.Type.IsInteger()); + + context.Assembler.WriteInstruction(info.Inst, dest, source); + + break; + } + + case IntrinsicType.UnaryToGpr: + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && !source.Type.IsInteger()); + + if (operation.Intrinsic == Intrinsic.X86Cvtsi2si) + { + if (dest.Type == OperandType.I32) + { + context.Assembler.Movd(dest, source); // int _mm_cvtsi128_si32(__m128i a) + } + else /* if (dest.Type == OperandType.I64) */ + { + context.Assembler.Movq(dest, source); // __int64 _mm_cvtsi128_si64(__m128i a) + } + } + else + { + context.Assembler.WriteInstruction(info.Inst, dest, source, dest.Type); + } + + break; + } + + case IntrinsicType.Binary: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(dest, src1); + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(!dest.Type.IsInteger()); + Debug.Assert(!src2.Type.IsInteger() || src2.Kind == OperandKind.Constant); + + context.Assembler.WriteInstruction(info.Inst, dest, src1, src2); + + break; + } + + case IntrinsicType.BinaryGpr: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(dest, src1); + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(!dest.Type.IsInteger() && src2.Type.IsInteger()); + + context.Assembler.WriteInstruction(info.Inst, dest, src1, src2, src2.Type); + + break; + } + + case IntrinsicType.Crc32: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameReg(dest, src1); + + Debug.Assert(dest.Type.IsInteger() && src1.Type.IsInteger() && src2.Type.IsInteger()); + + context.Assembler.WriteInstruction(info.Inst, dest, src2, dest.Type); + + break; + } + + case IntrinsicType.BinaryImm: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(dest, src1); + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(!dest.Type.IsInteger() && src2.Kind == OperandKind.Constant); + + context.Assembler.WriteInstruction(info.Inst, dest, src1, src2.AsByte()); + + break; + } + + case IntrinsicType.Ternary: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + EnsureSameType(dest, src1, src2, src3); + + Debug.Assert(!dest.Type.IsInteger()); + + if (info.Inst == X86Instruction.Blendvpd && HardwareCapabilities.SupportsVexEncoding) + { + context.Assembler.WriteInstruction(X86Instruction.Vblendvpd, dest, src1, src2, src3); + } + else if (info.Inst == X86Instruction.Blendvps && HardwareCapabilities.SupportsVexEncoding) + { + context.Assembler.WriteInstruction(X86Instruction.Vblendvps, dest, src1, src2, src3); + } + else if (info.Inst == X86Instruction.Pblendvb && HardwareCapabilities.SupportsVexEncoding) + { + context.Assembler.WriteInstruction(X86Instruction.Vpblendvb, dest, src1, src2, src3); + } + else + { + EnsureSameReg(dest, src1); + + Debug.Assert(src3.GetRegister().Index == 0); + + context.Assembler.WriteInstruction(info.Inst, dest, src1, src2); + } + + break; + } + + case IntrinsicType.TernaryImm: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + EnsureSameType(dest, src1, src2); + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(!dest.Type.IsInteger() && src3.Kind == OperandKind.Constant); + + context.Assembler.WriteInstruction(info.Inst, dest, src1, src2, src3.AsByte()); + + break; + } + + case IntrinsicType.Fma: + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + Debug.Assert(HardwareCapabilities.SupportsVexEncoding); + + Debug.Assert(dest.Kind == OperandKind.Register && src1.Kind == OperandKind.Register && src2.Kind == OperandKind.Register); + Debug.Assert(src3.Kind == OperandKind.Register || src3.Kind == OperandKind.Memory); + + EnsureSameType(dest, src1, src2, src3); + Debug.Assert(dest.Type == OperandType.V128); + + Debug.Assert(dest.Value == src1.Value); + + context.Assembler.WriteInstruction(info.Inst, dest, src2, src3); + + break; + } + } + } + else + { + Action<CodeGenContext, Operation> func = _instTable[(int)operation.Instruction]; + + if (func != null) + { + func(context, operation); + } + else + { + throw new ArgumentException($"Invalid instruction \"{operation.Instruction}\"."); + } + } + } + + private static void GenerateAdd(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + if (dest.Type.IsInteger()) + { + // If Destination and Source 1 Operands are the same, perform a standard add as there are no benefits to using LEA. + if (dest.Kind == src1.Kind && dest.Value == src1.Value) + { + ValidateBinOp(dest, src1, src2); + + context.Assembler.Add(dest, src2, dest.Type); + } + else + { + EnsureSameType(dest, src1, src2); + + int offset; + Operand index; + + if (src2.Kind == OperandKind.Constant) + { + offset = src2.AsInt32(); + index = default; + } + else + { + offset = 0; + index = src2; + } + + Operand memOp = MemoryOp(dest.Type, src1, index, Multiplier.x1, offset); + + context.Assembler.Lea(dest, memOp, dest.Type); + } + } + else + { + ValidateBinOp(dest, src1, src2); + + if (dest.Type == OperandType.FP32) + { + context.Assembler.Addss(dest, src1, src2); + } + else /* if (dest.Type == OperandType.FP64) */ + { + context.Assembler.Addsd(dest, src1, src2); + } + } + } + + private static void GenerateBitwiseAnd(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + Debug.Assert(dest.Type.IsInteger()); + + // Note: GenerateCompareCommon makes the assumption that BitwiseAnd will emit only a single `and` + // instruction. + context.Assembler.And(dest, src2, dest.Type); + } + + private static void GenerateBitwiseExclusiveOr(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Xor(dest, src2, dest.Type); + } + else + { + context.Assembler.Xorps(dest, src1, src2); + } + } + + private static void GenerateBitwiseNot(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + ValidateUnOp(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Not(dest); + } + + private static void GenerateBitwiseOr(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Or(dest, src2, dest.Type); + } + + private static void GenerateBranchIf(CodeGenContext context, Operation operation) + { + Operand comp = operation.GetSource(2); + + Debug.Assert(comp.Kind == OperandKind.Constant); + + var cond = ((Comparison)comp.AsInt32()).ToX86Condition(); + + GenerateCompareCommon(context, operation); + + context.JumpTo(cond, context.CurrBlock.GetSuccessor(1)); + } + + private static void GenerateByteSwap(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + ValidateUnOp(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Bswap(dest); + } + + private static void GenerateCall(CodeGenContext context, Operation operation) + { + context.Assembler.Call(operation.GetSource(0)); + } + + private static void GenerateClobber(CodeGenContext context, Operation operation) + { + // This is only used to indicate that a register is clobbered to the + // register allocator, we don't need to produce any code. + } + + private static void GenerateCompare(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand comp = operation.GetSource(2); + + Debug.Assert(dest.Type == OperandType.I32); + Debug.Assert(comp.Kind == OperandKind.Constant); + + var cond = ((Comparison)comp.AsInt32()).ToX86Condition(); + + GenerateCompareCommon(context, operation); + + context.Assembler.Setcc(dest, cond); + context.Assembler.Movzx8(dest, dest, OperandType.I32); + } + + private static void GenerateCompareCommon(CodeGenContext context, Operation operation) + { + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + EnsureSameType(src1, src2); + + Debug.Assert(src1.Type.IsInteger()); + + if (src2.Kind == OperandKind.Constant && src2.Value == 0) + { + if (MatchOperation(operation.ListPrevious, Instruction.BitwiseAnd, src1.Type, src1.GetRegister())) + { + // Since the `test` and `and` instruction set the status flags in the same way, we can omit the + // `test r,r` instruction when it is immediately preceded by an `and r,*` instruction. + // + // For example: + // + // and eax, 0x3 + // test eax, eax + // jz .L0 + // + // => + // + // and eax, 0x3 + // jz .L0 + } + else + { + context.Assembler.Test(src1, src1, src1.Type); + } + } + else + { + context.Assembler.Cmp(src1, src2, src1.Type); + } + } + + private static void GenerateCompareAndSwap(CodeGenContext context, Operation operation) + { + Operand src1 = operation.GetSource(0); + + if (operation.SourcesCount == 5) // CompareAndSwap128 has 5 sources, compared to CompareAndSwap64/32's 3. + { + Operand memOp = MemoryOp(OperandType.I64, src1); + + context.Assembler.Cmpxchg16b(memOp); + } + else + { + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + EnsureSameType(src2, src3); + + Operand memOp = MemoryOp(src3.Type, src1); + + context.Assembler.Cmpxchg(memOp, src3); + } + } + + private static void GenerateCompareAndSwap16(CodeGenContext context, Operation operation) + { + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + EnsureSameType(src2, src3); + + Operand memOp = MemoryOp(src3.Type, src1); + + context.Assembler.Cmpxchg16(memOp, src3); + } + + private static void GenerateCompareAndSwap8(CodeGenContext context, Operation operation) + { + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + EnsureSameType(src2, src3); + + Operand memOp = MemoryOp(src3.Type, src1); + + context.Assembler.Cmpxchg8(memOp, src3); + } + + private static void GenerateConditionalSelect(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + EnsureSameReg (dest, src3); + EnsureSameType(dest, src2, src3); + + Debug.Assert(dest.Type.IsInteger()); + Debug.Assert(src1.Type == OperandType.I32); + + context.Assembler.Test (src1, src1, src1.Type); + context.Assembler.Cmovcc(dest, src2, dest.Type, X86Condition.NotEqual); + } + + private static void GenerateConvertI64ToI32(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.I32 && source.Type == OperandType.I64); + + context.Assembler.Mov(dest, source, OperandType.I32); + } + + private static void GenerateConvertToFP(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.FP32 || dest.Type == OperandType.FP64); + + if (dest.Type == OperandType.FP32) + { + Debug.Assert(source.Type.IsInteger() || source.Type == OperandType.FP64); + + if (source.Type.IsInteger()) + { + context.Assembler.Xorps (dest, dest, dest); + context.Assembler.Cvtsi2ss(dest, dest, source, source.Type); + } + else /* if (source.Type == OperandType.FP64) */ + { + context.Assembler.Cvtsd2ss(dest, dest, source); + + GenerateZeroUpper96(context, dest, dest); + } + } + else /* if (dest.Type == OperandType.FP64) */ + { + Debug.Assert(source.Type.IsInteger() || source.Type == OperandType.FP32); + + if (source.Type.IsInteger()) + { + context.Assembler.Xorps (dest, dest, dest); + context.Assembler.Cvtsi2sd(dest, dest, source, source.Type); + } + else /* if (source.Type == OperandType.FP32) */ + { + context.Assembler.Cvtss2sd(dest, dest, source); + + GenerateZeroUpper64(context, dest, dest); + } + } + } + + private static void GenerateCopy(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + EnsureSameType(dest, source); + + Debug.Assert(dest.Type.IsInteger() || source.Kind != OperandKind.Constant); + + // Moves to the same register are useless. + if (dest.Kind == source.Kind && dest.Value == source.Value) + { + return; + } + + if (dest.Kind == OperandKind.Register && + source.Kind == OperandKind.Constant && source.Value == 0) + { + // Assemble "mov reg, 0" as "xor reg, reg" as the later is more efficient. + context.Assembler.Xor(dest, dest, OperandType.I32); + } + else if (dest.Type.IsInteger()) + { + context.Assembler.Mov(dest, source, dest.Type); + } + else + { + context.Assembler.Movdqu(dest, source); + } + } + + private static void GenerateCountLeadingZeros(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + EnsureSameType(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Bsr(dest, source, dest.Type); + + int operandSize = dest.Type == OperandType.I32 ? 32 : 64; + int operandMask = operandSize - 1; + + // When the input operand is 0, the result is undefined, however the + // ZF flag is set. We are supposed to return the operand size on that + // case. So, add an additional jump to handle that case, by moving the + // operand size constant to the destination register. + Operand neLabel = Label(); + + context.Assembler.Jcc(X86Condition.NotEqual, neLabel); + + context.Assembler.Mov(dest, Const(operandSize | operandMask), OperandType.I32); + + context.Assembler.MarkLabel(neLabel); + + // BSR returns the zero based index of the last bit set on the operand, + // starting from the least significant bit. However we are supposed to + // return the number of 0 bits on the high end. So, we invert the result + // of the BSR using XOR to get the correct value. + context.Assembler.Xor(dest, Const(operandMask), OperandType.I32); + } + + private static void GenerateDivide(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand dividend = operation.GetSource(0); + Operand divisor = operation.GetSource(1); + + if (!dest.Type.IsInteger()) + { + ValidateBinOp(dest, dividend, divisor); + } + + if (dest.Type.IsInteger()) + { + divisor = operation.GetSource(2); + + EnsureSameType(dest, divisor); + + if (divisor.Type == OperandType.I32) + { + context.Assembler.Cdq(); + } + else + { + context.Assembler.Cqo(); + } + + context.Assembler.Idiv(divisor); + } + else if (dest.Type == OperandType.FP32) + { + context.Assembler.Divss(dest, dividend, divisor); + } + else /* if (dest.Type == OperandType.FP64) */ + { + context.Assembler.Divsd(dest, dividend, divisor); + } + } + + private static void GenerateDivideUI(CodeGenContext context, Operation operation) + { + Operand divisor = operation.GetSource(2); + + Operand rdx = Register(X86Register.Rdx); + + Debug.Assert(divisor.Type.IsInteger()); + + context.Assembler.Xor(rdx, rdx, OperandType.I32); + context.Assembler.Div(divisor); + } + + private static void GenerateFill(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand offset = operation.GetSource(0); + + Debug.Assert(offset.Kind == OperandKind.Constant); + + int offs = offset.AsInt32() + context.CallArgsRegionSize; + + Operand rsp = Register(X86Register.Rsp); + + Operand memOp = MemoryOp(dest.Type, rsp, default, Multiplier.x1, offs); + + GenerateLoad(context, memOp, dest); + } + + private static void GenerateLoad(CodeGenContext context, Operation operation) + { + Operand value = operation.Destination; + Operand address = Memory(operation.GetSource(0), value.Type); + + GenerateLoad(context, address, value); + } + + private static void GenerateLoad16(CodeGenContext context, Operation operation) + { + Operand value = operation.Destination; + Operand address = Memory(operation.GetSource(0), value.Type); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.Movzx16(value, address, value.Type); + } + + private static void GenerateLoad8(CodeGenContext context, Operation operation) + { + Operand value = operation.Destination; + Operand address = Memory(operation.GetSource(0), value.Type); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.Movzx8(value, address, value.Type); + } + + private static void GenerateMemoryBarrier(CodeGenContext context, Operation operation) + { + context.Assembler.LockOr(MemoryOp(OperandType.I64, Register(X86Register.Rsp)), Const(0), OperandType.I32); + } + + private static void GenerateMultiply(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + if (src2.Kind != OperandKind.Constant) + { + EnsureSameReg(dest, src1); + } + + EnsureSameType(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + if (src2.Kind == OperandKind.Constant) + { + context.Assembler.Imul(dest, src1, src2, dest.Type); + } + else + { + context.Assembler.Imul(dest, src2, dest.Type); + } + } + else if (dest.Type == OperandType.FP32) + { + context.Assembler.Mulss(dest, src1, src2); + } + else /* if (dest.Type == OperandType.FP64) */ + { + context.Assembler.Mulsd(dest, src1, src2); + } + } + + private static void GenerateMultiply64HighSI(CodeGenContext context, Operation operation) + { + Operand source = operation.GetSource(1); + + Debug.Assert(source.Type == OperandType.I64); + + context.Assembler.Imul(source); + } + + private static void GenerateMultiply64HighUI(CodeGenContext context, Operation operation) + { + Operand source = operation.GetSource(1); + + Debug.Assert(source.Type == OperandType.I64); + + context.Assembler.Mul(source); + } + + private static void GenerateNegate(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + ValidateUnOp(dest, source); + + Debug.Assert(dest.Type.IsInteger()); + + context.Assembler.Neg(dest); + } + + private static void GenerateReturn(CodeGenContext context, Operation operation) + { + WriteEpilogue(context); + + context.Assembler.Return(); + } + + private static void GenerateRotateRight(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Ror(dest, src2, dest.Type); + } + + private static void GenerateShiftLeft(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Shl(dest, src2, dest.Type); + } + + private static void GenerateShiftRightSI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Sar(dest, src2, dest.Type); + } + + private static void GenerateShiftRightUI(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateShift(dest, src1, src2); + + context.Assembler.Shr(dest, src2, dest.Type); + } + + private static void GenerateSignExtend16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Movsx16(dest, source, dest.Type); + } + + private static void GenerateSignExtend32(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Movsx32(dest, source, dest.Type); + } + + private static void GenerateSignExtend8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Movsx8(dest, source, dest.Type); + } + + private static void GenerateSpill(CodeGenContext context, Operation operation) + { + GenerateSpill(context, operation, context.CallArgsRegionSize); + } + + private static void GenerateSpillArg(CodeGenContext context, Operation operation) + { + GenerateSpill(context, operation, 0); + } + + private static void GenerateSpill(CodeGenContext context, Operation operation, int baseOffset) + { + Operand offset = operation.GetSource(0); + Operand source = operation.GetSource(1); + + Debug.Assert(offset.Kind == OperandKind.Constant); + + int offs = offset.AsInt32() + baseOffset; + + Operand rsp = Register(X86Register.Rsp); + + Operand memOp = MemoryOp(source.Type, rsp, default, Multiplier.x1, offs); + + GenerateStore(context, memOp, source); + } + + private static void GenerateStackAlloc(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand offset = operation.GetSource(0); + + Debug.Assert(offset.Kind == OperandKind.Constant); + + int offs = offset.AsInt32() + context.CallArgsRegionSize; + + Operand rsp = Register(X86Register.Rsp); + + Operand memOp = MemoryOp(OperandType.I64, rsp, default, Multiplier.x1, offs); + + context.Assembler.Lea(dest, memOp, OperandType.I64); + } + + private static void GenerateStore(CodeGenContext context, Operation operation) + { + Operand value = operation.GetSource(1); + Operand address = Memory(operation.GetSource(0), value.Type); + + GenerateStore(context, address, value); + } + + private static void GenerateStore16(CodeGenContext context, Operation operation) + { + Operand value = operation.GetSource(1); + Operand address = Memory(operation.GetSource(0), value.Type); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.Mov16(address, value); + } + + private static void GenerateStore8(CodeGenContext context, Operation operation) + { + Operand value = operation.GetSource(1); + Operand address = Memory(operation.GetSource(0), value.Type); + + Debug.Assert(value.Type.IsInteger()); + + context.Assembler.Mov8(address, value); + } + + private static void GenerateSubtract(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + ValidateBinOp(dest, src1, src2); + + if (dest.Type.IsInteger()) + { + context.Assembler.Sub(dest, src2, dest.Type); + } + else if (dest.Type == OperandType.FP32) + { + context.Assembler.Subss(dest, src1, src2); + } + else /* if (dest.Type == OperandType.FP64) */ + { + context.Assembler.Subsd(dest, src1, src2); + } + } + + private static void GenerateTailcall(CodeGenContext context, Operation operation) + { + WriteEpilogue(context); + + context.Assembler.Jmp(operation.GetSource(0)); + } + + private static void GenerateVectorCreateScalar(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(!dest.Type.IsInteger() && source.Type.IsInteger()); + + if (source.Type == OperandType.I32) + { + context.Assembler.Movd(dest, source); // (__m128i _mm_cvtsi32_si128(int a)) + } + else /* if (source.Type == OperandType.I64) */ + { + context.Assembler.Movq(dest, source); // (__m128i _mm_cvtsi64_si128(__int64 a)) + } + } + + private static void GenerateVectorExtract(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; //Value + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Index + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src2.Kind == OperandKind.Constant); + + byte index = src2.AsByte(); + + Debug.Assert(index < OperandType.V128.GetSizeInBytes() / dest.Type.GetSizeInBytes()); + + if (dest.Type == OperandType.I32) + { + if (index == 0) + { + context.Assembler.Movd(dest, src1); + } + else if (HardwareCapabilities.SupportsSse41) + { + context.Assembler.Pextrd(dest, src1, index); + } + else + { + int mask0 = 0b11_10_01_00; + int mask1 = 0b11_10_01_00; + + mask0 = BitUtils.RotateRight(mask0, index * 2, 8); + mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8); + + context.Assembler.Pshufd(src1, src1, (byte)mask0); + context.Assembler.Movd (dest, src1); + context.Assembler.Pshufd(src1, src1, (byte)mask1); + } + } + else if (dest.Type == OperandType.I64) + { + if (index == 0) + { + context.Assembler.Movq(dest, src1); + } + else if (HardwareCapabilities.SupportsSse41) + { + context.Assembler.Pextrq(dest, src1, index); + } + else + { + const byte mask = 0b01_00_11_10; + + context.Assembler.Pshufd(src1, src1, mask); + context.Assembler.Movq (dest, src1); + context.Assembler.Pshufd(src1, src1, mask); + } + } + else + { + // Floating-point types. + if ((index >= 2 && dest.Type == OperandType.FP32) || + (index == 1 && dest.Type == OperandType.FP64)) + { + context.Assembler.Movhlps(dest, dest, src1); + context.Assembler.Movq (dest, dest); + } + else + { + context.Assembler.Movq(dest, src1); + } + + if (dest.Type == OperandType.FP32) + { + context.Assembler.Pshufd(dest, dest, (byte)(0xfc | (index & 1))); + } + } + } + + private static void GenerateVectorExtract16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; //Value + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Index + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src2.Kind == OperandKind.Constant); + + byte index = src2.AsByte(); + + Debug.Assert(index < 8); + + context.Assembler.Pextrw(dest, src1, index); + } + + private static void GenerateVectorExtract8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; //Value + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Index + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src2.Kind == OperandKind.Constant); + + byte index = src2.AsByte(); + + Debug.Assert(index < 16); + + if (HardwareCapabilities.SupportsSse41) + { + context.Assembler.Pextrb(dest, src1, index); + } + else + { + context.Assembler.Pextrw(dest, src1, (byte)(index >> 1)); + + if ((index & 1) != 0) + { + context.Assembler.Shr(dest, Const(8), OperandType.I32); + } + else + { + context.Assembler.Movzx8(dest, dest, OperandType.I32); + } + } + } + + private static void GenerateVectorInsert(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Value + Operand src3 = operation.GetSource(2); //Index + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + void InsertIntSse2(int words) + { + if (dest.GetRegister() != src1.GetRegister()) + { + context.Assembler.Movdqu(dest, src1); + } + + for (int word = 0; word < words; word++) + { + // Insert lower 16-bits. + context.Assembler.Pinsrw(dest, dest, src2, (byte)(index * words + word)); + + // Move next word down. + context.Assembler.Ror(src2, Const(16), src2.Type); + } + } + + if (src2.Type == OperandType.I32) + { + Debug.Assert(index < 4); + + if (HardwareCapabilities.SupportsSse41) + { + context.Assembler.Pinsrd(dest, src1, src2, index); + } + else + { + InsertIntSse2(2); + } + } + else if (src2.Type == OperandType.I64) + { + Debug.Assert(index < 2); + + if (HardwareCapabilities.SupportsSse41) + { + context.Assembler.Pinsrq(dest, src1, src2, index); + } + else + { + InsertIntSse2(4); + } + } + else if (src2.Type == OperandType.FP32) + { + Debug.Assert(index < 4); + + if (index != 0) + { + if (HardwareCapabilities.SupportsSse41) + { + context.Assembler.Insertps(dest, src1, src2, (byte)(index << 4)); + } + else + { + if (src1.GetRegister() == src2.GetRegister()) + { + int mask = 0b11_10_01_00; + + mask &= ~(0b11 << index * 2); + + context.Assembler.Pshufd(dest, src1, (byte)mask); + } + else + { + int mask0 = 0b11_10_01_00; + int mask1 = 0b11_10_01_00; + + mask0 = BitUtils.RotateRight(mask0, index * 2, 8); + mask1 = BitUtils.RotateRight(mask1, 8 - index * 2, 8); + + context.Assembler.Pshufd(src1, src1, (byte)mask0); // Lane to be inserted in position 0. + context.Assembler.Movss (dest, src1, src2); // dest[127:0] = src1[127:32] | src2[31:0] + context.Assembler.Pshufd(dest, dest, (byte)mask1); // Inserted lane in original position. + + if (dest.GetRegister() != src1.GetRegister()) + { + context.Assembler.Pshufd(src1, src1, (byte)mask1); // Restore src1. + } + } + } + } + else + { + context.Assembler.Movss(dest, src1, src2); + } + } + else /* if (src2.Type == OperandType.FP64) */ + { + Debug.Assert(index < 2); + + if (index != 0) + { + context.Assembler.Movlhps(dest, src1, src2); + } + else + { + context.Assembler.Movsd(dest, src1, src2); + } + } + } + + private static void GenerateVectorInsert16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Value + Operand src3 = operation.GetSource(2); //Index + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + context.Assembler.Pinsrw(dest, src1, src2, index); + } + + private static void GenerateVectorInsert8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand src1 = operation.GetSource(0); //Vector + Operand src2 = operation.GetSource(1); //Value + Operand src3 = operation.GetSource(2); //Index + + // It's not possible to emulate this instruction without + // SSE 4.1 support without the use of a temporary register, + // so we instead handle that case on the pre-allocator when + // SSE 4.1 is not supported on the CPU. + Debug.Assert(HardwareCapabilities.SupportsSse41); + + if (!HardwareCapabilities.SupportsVexEncoding) + { + EnsureSameReg(dest, src1); + } + + Debug.Assert(src1.Type == OperandType.V128); + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + context.Assembler.Pinsrb(dest, src1, src2, index); + } + + private static void GenerateVectorOne(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + + Debug.Assert(!dest.Type.IsInteger()); + + context.Assembler.Pcmpeqw(dest, dest, dest); + } + + private static void GenerateVectorZero(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + + Debug.Assert(!dest.Type.IsInteger()); + + context.Assembler.Xorps(dest, dest, dest); + } + + private static void GenerateVectorZeroUpper64(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128); + + GenerateZeroUpper64(context, dest, source); + } + + private static void GenerateVectorZeroUpper96(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type == OperandType.V128 && source.Type == OperandType.V128); + + GenerateZeroUpper96(context, dest, source); + } + + private static void GenerateZeroExtend16(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Movzx16(dest, source, OperandType.I32); + } + + private static void GenerateZeroExtend32(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + // We can eliminate the move if source is already 32-bit and the registers are the same. + if (dest.Value == source.Value && source.Type == OperandType.I32) + { + return; + } + + context.Assembler.Mov(dest, source, OperandType.I32); + } + + private static void GenerateZeroExtend8(CodeGenContext context, Operation operation) + { + Operand dest = operation.Destination; + Operand source = operation.GetSource(0); + + Debug.Assert(dest.Type.IsInteger() && source.Type.IsInteger()); + + context.Assembler.Movzx8(dest, source, OperandType.I32); + } + + private static void GenerateLoad(CodeGenContext context, Operand address, Operand value) + { + switch (value.Type) + { + case OperandType.I32: context.Assembler.Mov (value, address, OperandType.I32); break; + case OperandType.I64: context.Assembler.Mov (value, address, OperandType.I64); break; + case OperandType.FP32: context.Assembler.Movd (value, address); break; + case OperandType.FP64: context.Assembler.Movq (value, address); break; + case OperandType.V128: context.Assembler.Movdqu(value, address); break; + + default: Debug.Assert(false); break; + } + } + + private static void GenerateStore(CodeGenContext context, Operand address, Operand value) + { + switch (value.Type) + { + case OperandType.I32: context.Assembler.Mov (address, value, OperandType.I32); break; + case OperandType.I64: context.Assembler.Mov (address, value, OperandType.I64); break; + case OperandType.FP32: context.Assembler.Movd (address, value); break; + case OperandType.FP64: context.Assembler.Movq (address, value); break; + case OperandType.V128: context.Assembler.Movdqu(address, value); break; + + default: Debug.Assert(false); break; + } + } + + private static void GenerateZeroUpper64(CodeGenContext context, Operand dest, Operand source) + { + context.Assembler.Movq(dest, source); + } + + private static void GenerateZeroUpper96(CodeGenContext context, Operand dest, Operand source) + { + context.Assembler.Movq(dest, source); + context.Assembler.Pshufd(dest, dest, 0xfc); + } + + private static bool MatchOperation(Operation node, Instruction inst, OperandType destType, Register destReg) + { + if (node == default || node.DestinationsCount == 0) + { + return false; + } + + if (node.Instruction != inst) + { + return false; + } + + Operand dest = node.Destination; + + return dest.Kind == OperandKind.Register && + dest.Type == destType && + dest.GetRegister() == destReg; + } + + [Conditional("DEBUG")] + private static void ValidateUnOp(Operand dest, Operand source) + { + EnsureSameReg (dest, source); + EnsureSameType(dest, source); + } + + [Conditional("DEBUG")] + private static void ValidateBinOp(Operand dest, Operand src1, Operand src2) + { + EnsureSameReg (dest, src1); + EnsureSameType(dest, src1, src2); + } + + [Conditional("DEBUG")] + private static void ValidateShift(Operand dest, Operand src1, Operand src2) + { + EnsureSameReg (dest, src1); + EnsureSameType(dest, src1); + + Debug.Assert(dest.Type.IsInteger() && src2.Type == OperandType.I32); + } + + private static void EnsureSameReg(Operand op1, Operand op2) + { + if (!op1.Type.IsInteger() && HardwareCapabilities.SupportsVexEncoding) + { + return; + } + + Debug.Assert(op1.Kind == OperandKind.Register || op1.Kind == OperandKind.Memory); + Debug.Assert(op1.Kind == op2.Kind); + Debug.Assert(op1.Value == op2.Value); + } + + private static void EnsureSameType(Operand op1, Operand op2) + { + Debug.Assert(op1.Type == op2.Type); + } + + private static void EnsureSameType(Operand op1, Operand op2, Operand op3) + { + Debug.Assert(op1.Type == op2.Type); + Debug.Assert(op1.Type == op3.Type); + } + + private static void EnsureSameType(Operand op1, Operand op2, Operand op3, Operand op4) + { + Debug.Assert(op1.Type == op2.Type); + Debug.Assert(op1.Type == op3.Type); + Debug.Assert(op1.Type == op4.Type); + } + + private static UnwindInfo WritePrologue(CodeGenContext context) + { + List<UnwindPushEntry> pushEntries = new List<UnwindPushEntry>(); + + Operand rsp = Register(X86Register.Rsp); + + int mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters; + + while (mask != 0) + { + int bit = BitOperations.TrailingZeroCount(mask); + + context.Assembler.Push(Register((X86Register)bit)); + + pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.PushReg, context.StreamOffset, regIndex: bit)); + + mask &= ~(1 << bit); + } + + int reservedStackSize = context.CallArgsRegionSize + context.AllocResult.SpillRegionSize; + + reservedStackSize += context.XmmSaveRegionSize; + + if (reservedStackSize >= StackGuardSize) + { + GenerateInlineStackProbe(context, reservedStackSize); + } + + if (reservedStackSize != 0) + { + context.Assembler.Sub(rsp, Const(reservedStackSize), OperandType.I64); + + pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.AllocStack, context.StreamOffset, stackOffsetOrAllocSize: reservedStackSize)); + } + + int offset = reservedStackSize; + + mask = CallingConvention.GetVecCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters; + + while (mask != 0) + { + int bit = BitOperations.TrailingZeroCount(mask); + + offset -= 16; + + Operand memOp = MemoryOp(OperandType.V128, rsp, default, Multiplier.x1, offset); + + context.Assembler.Movdqu(memOp, Xmm((X86Register)bit)); + + pushEntries.Add(new UnwindPushEntry(UnwindPseudoOp.SaveXmm128, context.StreamOffset, bit, offset)); + + mask &= ~(1 << bit); + } + + return new UnwindInfo(pushEntries.ToArray(), context.StreamOffset); + } + + private static void WriteEpilogue(CodeGenContext context) + { + Operand rsp = Register(X86Register.Rsp); + + int reservedStackSize = context.CallArgsRegionSize + context.AllocResult.SpillRegionSize; + + reservedStackSize += context.XmmSaveRegionSize; + + int offset = reservedStackSize; + + int mask = CallingConvention.GetVecCalleeSavedRegisters() & context.AllocResult.VecUsedRegisters; + + while (mask != 0) + { + int bit = BitOperations.TrailingZeroCount(mask); + + offset -= 16; + + Operand memOp = MemoryOp(OperandType.V128, rsp, default, Multiplier.x1, offset); + + context.Assembler.Movdqu(Xmm((X86Register)bit), memOp); + + mask &= ~(1 << bit); + } + + if (reservedStackSize != 0) + { + context.Assembler.Add(rsp, Const(reservedStackSize), OperandType.I64); + } + + mask = CallingConvention.GetIntCalleeSavedRegisters() & context.AllocResult.IntUsedRegisters; + + while (mask != 0) + { + int bit = BitUtils.HighestBitSet(mask); + + context.Assembler.Pop(Register((X86Register)bit)); + + mask &= ~(1 << bit); + } + } + + private static void GenerateInlineStackProbe(CodeGenContext context, int size) + { + // Windows does lazy stack allocation, and there are just 2 + // guard pages on the end of the stack. So, if the allocation + // size we make is greater than this guard size, we must ensure + // that the OS will map all pages that we'll use. We do that by + // doing a dummy read on those pages, forcing a page fault and + // the OS to map them. If they are already mapped, nothing happens. + const int pageMask = PageSize - 1; + + size = (size + pageMask) & ~pageMask; + + Operand rsp = Register(X86Register.Rsp); + Operand temp = Register(CallingConvention.GetIntReturnRegister()); + + for (int offset = PageSize; offset < size; offset += PageSize) + { + Operand memOp = MemoryOp(OperandType.I32, rsp, default, Multiplier.x1, -offset); + + context.Assembler.Mov(temp, memOp, OperandType.I32); + } + } + + private static Operand Memory(Operand operand, OperandType type) + { + if (operand.Kind == OperandKind.Memory) + { + return operand; + } + + return MemoryOp(type, operand); + } + + private static Operand Register(X86Register register, OperandType type = OperandType.I64) + { + return Operand.Factory.Register((int)register, RegisterType.Integer, type); + } + + private static Operand Xmm(X86Register register) + { + return Operand.Factory.Register((int)register, RegisterType.Vector, OperandType.V128); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs b/src/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs new file mode 100644 index 00000000..07cdcd09 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/HardwareCapabilities.cs @@ -0,0 +1,144 @@ +using Ryujinx.Memory; +using System; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; + +namespace ARMeilleure.CodeGen.X86 +{ + static class HardwareCapabilities + { + private delegate uint GetXcr0(); + + static HardwareCapabilities() + { + if (!X86Base.IsSupported) + { + return; + } + + (int maxNum, _, _, _) = X86Base.CpuId(0x00000000, 0x00000000); + + (_, _, int ecx1, int edx1) = X86Base.CpuId(0x00000001, 0x00000000); + FeatureInfo1Edx = (FeatureFlags1Edx)edx1; + FeatureInfo1Ecx = (FeatureFlags1Ecx)ecx1; + + if (maxNum >= 7) + { + (_, int ebx7, int ecx7, _) = X86Base.CpuId(0x00000007, 0x00000000); + FeatureInfo7Ebx = (FeatureFlags7Ebx)ebx7; + FeatureInfo7Ecx = (FeatureFlags7Ecx)ecx7; + } + + Xcr0InfoEax = (Xcr0FlagsEax)GetXcr0Eax(); + } + + private static uint GetXcr0Eax() + { + if (!FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Xsave)) + { + // XSAVE feature required for xgetbv + return 0; + } + + ReadOnlySpan<byte> asmGetXcr0 = new byte[] + { + 0x31, 0xc9, // xor ecx, ecx + 0xf, 0x01, 0xd0, // xgetbv + 0xc3, // ret + }; + + using MemoryBlock memGetXcr0 = new MemoryBlock((ulong)asmGetXcr0.Length); + + memGetXcr0.Write(0, asmGetXcr0); + + memGetXcr0.Reprotect(0, (ulong)asmGetXcr0.Length, MemoryPermission.ReadAndExecute); + + var fGetXcr0 = Marshal.GetDelegateForFunctionPointer<GetXcr0>(memGetXcr0.Pointer); + + return fGetXcr0(); + } + + [Flags] + public enum FeatureFlags1Edx + { + Sse = 1 << 25, + Sse2 = 1 << 26 + } + + [Flags] + public enum FeatureFlags1Ecx + { + Sse3 = 1 << 0, + Pclmulqdq = 1 << 1, + Ssse3 = 1 << 9, + Fma = 1 << 12, + Sse41 = 1 << 19, + Sse42 = 1 << 20, + Popcnt = 1 << 23, + Aes = 1 << 25, + Xsave = 1 << 26, + Osxsave = 1 << 27, + Avx = 1 << 28, + F16c = 1 << 29 + } + + [Flags] + public enum FeatureFlags7Ebx + { + Avx2 = 1 << 5, + Avx512f = 1 << 16, + Avx512dq = 1 << 17, + Sha = 1 << 29, + Avx512bw = 1 << 30, + Avx512vl = 1 << 31 + } + + [Flags] + public enum FeatureFlags7Ecx + { + Gfni = 1 << 8, + } + + [Flags] + public enum Xcr0FlagsEax + { + Sse = 1 << 1, + YmmHi128 = 1 << 2, + Opmask = 1 << 5, + ZmmHi256 = 1 << 6, + Hi16Zmm = 1 << 7 + } + + public static FeatureFlags1Edx FeatureInfo1Edx { get; } + public static FeatureFlags1Ecx FeatureInfo1Ecx { get; } + public static FeatureFlags7Ebx FeatureInfo7Ebx { get; } = 0; + public static FeatureFlags7Ecx FeatureInfo7Ecx { get; } = 0; + public static Xcr0FlagsEax Xcr0InfoEax { get; } = 0; + + public static bool SupportsSse => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse); + public static bool SupportsSse2 => FeatureInfo1Edx.HasFlag(FeatureFlags1Edx.Sse2); + public static bool SupportsSse3 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse3); + public static bool SupportsPclmulqdq => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Pclmulqdq); + public static bool SupportsSsse3 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Ssse3); + public static bool SupportsFma => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Fma); + public static bool SupportsSse41 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse41); + public static bool SupportsSse42 => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Sse42); + public static bool SupportsPopcnt => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Popcnt); + public static bool SupportsAesni => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Aes); + public static bool SupportsAvx => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Avx | FeatureFlags1Ecx.Xsave | FeatureFlags1Ecx.Osxsave) && Xcr0InfoEax.HasFlag(Xcr0FlagsEax.Sse | Xcr0FlagsEax.YmmHi128); + public static bool SupportsAvx2 => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx2) && SupportsAvx; + public static bool SupportsAvx512F => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512f) && FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.Xsave | FeatureFlags1Ecx.Osxsave) + && Xcr0InfoEax.HasFlag(Xcr0FlagsEax.Sse | Xcr0FlagsEax.YmmHi128 | Xcr0FlagsEax.Opmask | Xcr0FlagsEax.ZmmHi256 | Xcr0FlagsEax.Hi16Zmm); + public static bool SupportsAvx512Vl => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512vl) && SupportsAvx512F; + public static bool SupportsAvx512Bw => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512bw) && SupportsAvx512F; + public static bool SupportsAvx512Dq => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Avx512dq) && SupportsAvx512F; + public static bool SupportsF16c => FeatureInfo1Ecx.HasFlag(FeatureFlags1Ecx.F16c); + public static bool SupportsSha => FeatureInfo7Ebx.HasFlag(FeatureFlags7Ebx.Sha); + public static bool SupportsGfni => FeatureInfo7Ecx.HasFlag(FeatureFlags7Ecx.Gfni); + + public static bool ForceLegacySse { get; set; } + + public static bool SupportsVexEncoding => SupportsAvx && !ForceLegacySse; + public static bool SupportsEvexEncoding => SupportsAvx512F && !ForceLegacySse; + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs b/src/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs new file mode 100644 index 00000000..302bf4d3 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/IntrinsicInfo.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.CodeGen.X86 +{ + readonly struct IntrinsicInfo + { + public X86Instruction Inst { get; } + public IntrinsicType Type { get; } + + public IntrinsicInfo(X86Instruction inst, IntrinsicType type) + { + Inst = inst; + Type = type; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/IntrinsicTable.cs b/src/ARMeilleure/CodeGen/X86/IntrinsicTable.cs new file mode 100644 index 00000000..e3d94b7a --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/IntrinsicTable.cs @@ -0,0 +1,200 @@ +using ARMeilleure.Common; +using ARMeilleure.IntermediateRepresentation; + +namespace ARMeilleure.CodeGen.X86 +{ + static class IntrinsicTable + { + private static IntrinsicInfo[] _intrinTable; + + static IntrinsicTable() + { + _intrinTable = new IntrinsicInfo[EnumUtils.GetCount(typeof(Intrinsic))]; + + Add(Intrinsic.X86Addpd, new IntrinsicInfo(X86Instruction.Addpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Addps, new IntrinsicInfo(X86Instruction.Addps, IntrinsicType.Binary)); + Add(Intrinsic.X86Addsd, new IntrinsicInfo(X86Instruction.Addsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Addss, new IntrinsicInfo(X86Instruction.Addss, IntrinsicType.Binary)); + Add(Intrinsic.X86Aesdec, new IntrinsicInfo(X86Instruction.Aesdec, IntrinsicType.Binary)); + Add(Intrinsic.X86Aesdeclast, new IntrinsicInfo(X86Instruction.Aesdeclast, IntrinsicType.Binary)); + Add(Intrinsic.X86Aesenc, new IntrinsicInfo(X86Instruction.Aesenc, IntrinsicType.Binary)); + Add(Intrinsic.X86Aesenclast, new IntrinsicInfo(X86Instruction.Aesenclast, IntrinsicType.Binary)); + Add(Intrinsic.X86Aesimc, new IntrinsicInfo(X86Instruction.Aesimc, IntrinsicType.Unary)); + Add(Intrinsic.X86Andnpd, new IntrinsicInfo(X86Instruction.Andnpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Andnps, new IntrinsicInfo(X86Instruction.Andnps, IntrinsicType.Binary)); + Add(Intrinsic.X86Andpd, new IntrinsicInfo(X86Instruction.Andpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Andps, new IntrinsicInfo(X86Instruction.Andps, IntrinsicType.Binary)); + Add(Intrinsic.X86Blendvpd, new IntrinsicInfo(X86Instruction.Blendvpd, IntrinsicType.Ternary)); + Add(Intrinsic.X86Blendvps, new IntrinsicInfo(X86Instruction.Blendvps, IntrinsicType.Ternary)); + Add(Intrinsic.X86Cmppd, new IntrinsicInfo(X86Instruction.Cmppd, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Cmpps, new IntrinsicInfo(X86Instruction.Cmpps, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Cmpsd, new IntrinsicInfo(X86Instruction.Cmpsd, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Cmpss, new IntrinsicInfo(X86Instruction.Cmpss, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Comisdeq, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_)); + Add(Intrinsic.X86Comisdge, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_)); + Add(Intrinsic.X86Comisdlt, new IntrinsicInfo(X86Instruction.Comisd, IntrinsicType.Comis_)); + Add(Intrinsic.X86Comisseq, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_)); + Add(Intrinsic.X86Comissge, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_)); + Add(Intrinsic.X86Comisslt, new IntrinsicInfo(X86Instruction.Comiss, IntrinsicType.Comis_)); + Add(Intrinsic.X86Crc32, new IntrinsicInfo(X86Instruction.Crc32, IntrinsicType.Crc32)); + Add(Intrinsic.X86Crc32_16, new IntrinsicInfo(X86Instruction.Crc32_16, IntrinsicType.Crc32)); + Add(Intrinsic.X86Crc32_8, new IntrinsicInfo(X86Instruction.Crc32_8, IntrinsicType.Crc32)); + Add(Intrinsic.X86Cvtdq2pd, new IntrinsicInfo(X86Instruction.Cvtdq2pd, IntrinsicType.Unary)); + Add(Intrinsic.X86Cvtdq2ps, new IntrinsicInfo(X86Instruction.Cvtdq2ps, IntrinsicType.Unary)); + Add(Intrinsic.X86Cvtpd2dq, new IntrinsicInfo(X86Instruction.Cvtpd2dq, IntrinsicType.Unary)); + Add(Intrinsic.X86Cvtpd2ps, new IntrinsicInfo(X86Instruction.Cvtpd2ps, IntrinsicType.Unary)); + Add(Intrinsic.X86Cvtps2dq, new IntrinsicInfo(X86Instruction.Cvtps2dq, IntrinsicType.Unary)); + Add(Intrinsic.X86Cvtps2pd, new IntrinsicInfo(X86Instruction.Cvtps2pd, IntrinsicType.Unary)); + Add(Intrinsic.X86Cvtsd2si, new IntrinsicInfo(X86Instruction.Cvtsd2si, IntrinsicType.UnaryToGpr)); + Add(Intrinsic.X86Cvtsd2ss, new IntrinsicInfo(X86Instruction.Cvtsd2ss, IntrinsicType.Binary)); + Add(Intrinsic.X86Cvtsi2sd, new IntrinsicInfo(X86Instruction.Cvtsi2sd, IntrinsicType.BinaryGpr)); + Add(Intrinsic.X86Cvtsi2si, new IntrinsicInfo(X86Instruction.Movd, IntrinsicType.UnaryToGpr)); + Add(Intrinsic.X86Cvtsi2ss, new IntrinsicInfo(X86Instruction.Cvtsi2ss, IntrinsicType.BinaryGpr)); + Add(Intrinsic.X86Cvtss2sd, new IntrinsicInfo(X86Instruction.Cvtss2sd, IntrinsicType.Binary)); + Add(Intrinsic.X86Cvtss2si, new IntrinsicInfo(X86Instruction.Cvtss2si, IntrinsicType.UnaryToGpr)); + Add(Intrinsic.X86Divpd, new IntrinsicInfo(X86Instruction.Divpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Divps, new IntrinsicInfo(X86Instruction.Divps, IntrinsicType.Binary)); + Add(Intrinsic.X86Divsd, new IntrinsicInfo(X86Instruction.Divsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Divss, new IntrinsicInfo(X86Instruction.Divss, IntrinsicType.Binary)); + Add(Intrinsic.X86Gf2p8affineqb, new IntrinsicInfo(X86Instruction.Gf2p8affineqb, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Haddpd, new IntrinsicInfo(X86Instruction.Haddpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Haddps, new IntrinsicInfo(X86Instruction.Haddps, IntrinsicType.Binary)); + Add(Intrinsic.X86Insertps, new IntrinsicInfo(X86Instruction.Insertps, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Ldmxcsr, new IntrinsicInfo(X86Instruction.None, IntrinsicType.Mxcsr)); + Add(Intrinsic.X86Maxpd, new IntrinsicInfo(X86Instruction.Maxpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Maxps, new IntrinsicInfo(X86Instruction.Maxps, IntrinsicType.Binary)); + Add(Intrinsic.X86Maxsd, new IntrinsicInfo(X86Instruction.Maxsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Maxss, new IntrinsicInfo(X86Instruction.Maxss, IntrinsicType.Binary)); + Add(Intrinsic.X86Minpd, new IntrinsicInfo(X86Instruction.Minpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Minps, new IntrinsicInfo(X86Instruction.Minps, IntrinsicType.Binary)); + Add(Intrinsic.X86Minsd, new IntrinsicInfo(X86Instruction.Minsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Minss, new IntrinsicInfo(X86Instruction.Minss, IntrinsicType.Binary)); + Add(Intrinsic.X86Movhlps, new IntrinsicInfo(X86Instruction.Movhlps, IntrinsicType.Binary)); + Add(Intrinsic.X86Movlhps, new IntrinsicInfo(X86Instruction.Movlhps, IntrinsicType.Binary)); + Add(Intrinsic.X86Movss, new IntrinsicInfo(X86Instruction.Movss, IntrinsicType.Binary)); + Add(Intrinsic.X86Mulpd, new IntrinsicInfo(X86Instruction.Mulpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Mulps, new IntrinsicInfo(X86Instruction.Mulps, IntrinsicType.Binary)); + Add(Intrinsic.X86Mulsd, new IntrinsicInfo(X86Instruction.Mulsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Mulss, new IntrinsicInfo(X86Instruction.Mulss, IntrinsicType.Binary)); + Add(Intrinsic.X86Paddb, new IntrinsicInfo(X86Instruction.Paddb, IntrinsicType.Binary)); + Add(Intrinsic.X86Paddd, new IntrinsicInfo(X86Instruction.Paddd, IntrinsicType.Binary)); + Add(Intrinsic.X86Paddq, new IntrinsicInfo(X86Instruction.Paddq, IntrinsicType.Binary)); + Add(Intrinsic.X86Paddw, new IntrinsicInfo(X86Instruction.Paddw, IntrinsicType.Binary)); + Add(Intrinsic.X86Palignr, new IntrinsicInfo(X86Instruction.Palignr, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Pand, new IntrinsicInfo(X86Instruction.Pand, IntrinsicType.Binary)); + Add(Intrinsic.X86Pandn, new IntrinsicInfo(X86Instruction.Pandn, IntrinsicType.Binary)); + Add(Intrinsic.X86Pavgb, new IntrinsicInfo(X86Instruction.Pavgb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pavgw, new IntrinsicInfo(X86Instruction.Pavgw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pblendvb, new IntrinsicInfo(X86Instruction.Pblendvb, IntrinsicType.Ternary)); + Add(Intrinsic.X86Pclmulqdq, new IntrinsicInfo(X86Instruction.Pclmulqdq, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Pcmpeqb, new IntrinsicInfo(X86Instruction.Pcmpeqb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpeqd, new IntrinsicInfo(X86Instruction.Pcmpeqd, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpeqq, new IntrinsicInfo(X86Instruction.Pcmpeqq, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpeqw, new IntrinsicInfo(X86Instruction.Pcmpeqw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpgtb, new IntrinsicInfo(X86Instruction.Pcmpgtb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpgtd, new IntrinsicInfo(X86Instruction.Pcmpgtd, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpgtq, new IntrinsicInfo(X86Instruction.Pcmpgtq, IntrinsicType.Binary)); + Add(Intrinsic.X86Pcmpgtw, new IntrinsicInfo(X86Instruction.Pcmpgtw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmaxsb, new IntrinsicInfo(X86Instruction.Pmaxsb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmaxsd, new IntrinsicInfo(X86Instruction.Pmaxsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmaxsw, new IntrinsicInfo(X86Instruction.Pmaxsw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmaxub, new IntrinsicInfo(X86Instruction.Pmaxub, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmaxud, new IntrinsicInfo(X86Instruction.Pmaxud, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmaxuw, new IntrinsicInfo(X86Instruction.Pmaxuw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pminsb, new IntrinsicInfo(X86Instruction.Pminsb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pminsd, new IntrinsicInfo(X86Instruction.Pminsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Pminsw, new IntrinsicInfo(X86Instruction.Pminsw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pminub, new IntrinsicInfo(X86Instruction.Pminub, IntrinsicType.Binary)); + Add(Intrinsic.X86Pminud, new IntrinsicInfo(X86Instruction.Pminud, IntrinsicType.Binary)); + Add(Intrinsic.X86Pminuw, new IntrinsicInfo(X86Instruction.Pminuw, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmovsxbw, new IntrinsicInfo(X86Instruction.Pmovsxbw, IntrinsicType.Unary)); + Add(Intrinsic.X86Pmovsxdq, new IntrinsicInfo(X86Instruction.Pmovsxdq, IntrinsicType.Unary)); + Add(Intrinsic.X86Pmovsxwd, new IntrinsicInfo(X86Instruction.Pmovsxwd, IntrinsicType.Unary)); + Add(Intrinsic.X86Pmovzxbw, new IntrinsicInfo(X86Instruction.Pmovzxbw, IntrinsicType.Unary)); + Add(Intrinsic.X86Pmovzxdq, new IntrinsicInfo(X86Instruction.Pmovzxdq, IntrinsicType.Unary)); + Add(Intrinsic.X86Pmovzxwd, new IntrinsicInfo(X86Instruction.Pmovzxwd, IntrinsicType.Unary)); + Add(Intrinsic.X86Pmulld, new IntrinsicInfo(X86Instruction.Pmulld, IntrinsicType.Binary)); + Add(Intrinsic.X86Pmullw, new IntrinsicInfo(X86Instruction.Pmullw, IntrinsicType.Binary)); + Add(Intrinsic.X86Popcnt, new IntrinsicInfo(X86Instruction.Popcnt, IntrinsicType.PopCount)); + Add(Intrinsic.X86Por, new IntrinsicInfo(X86Instruction.Por, IntrinsicType.Binary)); + Add(Intrinsic.X86Pshufb, new IntrinsicInfo(X86Instruction.Pshufb, IntrinsicType.Binary)); + Add(Intrinsic.X86Pshufd, new IntrinsicInfo(X86Instruction.Pshufd, IntrinsicType.BinaryImm)); + Add(Intrinsic.X86Pslld, new IntrinsicInfo(X86Instruction.Pslld, IntrinsicType.Binary)); + Add(Intrinsic.X86Pslldq, new IntrinsicInfo(X86Instruction.Pslldq, IntrinsicType.Binary)); + Add(Intrinsic.X86Psllq, new IntrinsicInfo(X86Instruction.Psllq, IntrinsicType.Binary)); + Add(Intrinsic.X86Psllw, new IntrinsicInfo(X86Instruction.Psllw, IntrinsicType.Binary)); + Add(Intrinsic.X86Psrad, new IntrinsicInfo(X86Instruction.Psrad, IntrinsicType.Binary)); + Add(Intrinsic.X86Psraw, new IntrinsicInfo(X86Instruction.Psraw, IntrinsicType.Binary)); + Add(Intrinsic.X86Psrld, new IntrinsicInfo(X86Instruction.Psrld, IntrinsicType.Binary)); + Add(Intrinsic.X86Psrlq, new IntrinsicInfo(X86Instruction.Psrlq, IntrinsicType.Binary)); + Add(Intrinsic.X86Psrldq, new IntrinsicInfo(X86Instruction.Psrldq, IntrinsicType.Binary)); + Add(Intrinsic.X86Psrlw, new IntrinsicInfo(X86Instruction.Psrlw, IntrinsicType.Binary)); + Add(Intrinsic.X86Psubb, new IntrinsicInfo(X86Instruction.Psubb, IntrinsicType.Binary)); + Add(Intrinsic.X86Psubd, new IntrinsicInfo(X86Instruction.Psubd, IntrinsicType.Binary)); + Add(Intrinsic.X86Psubq, new IntrinsicInfo(X86Instruction.Psubq, IntrinsicType.Binary)); + Add(Intrinsic.X86Psubw, new IntrinsicInfo(X86Instruction.Psubw, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpckhbw, new IntrinsicInfo(X86Instruction.Punpckhbw, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpckhdq, new IntrinsicInfo(X86Instruction.Punpckhdq, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpckhqdq, new IntrinsicInfo(X86Instruction.Punpckhqdq, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpckhwd, new IntrinsicInfo(X86Instruction.Punpckhwd, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpcklbw, new IntrinsicInfo(X86Instruction.Punpcklbw, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpckldq, new IntrinsicInfo(X86Instruction.Punpckldq, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpcklqdq, new IntrinsicInfo(X86Instruction.Punpcklqdq, IntrinsicType.Binary)); + Add(Intrinsic.X86Punpcklwd, new IntrinsicInfo(X86Instruction.Punpcklwd, IntrinsicType.Binary)); + Add(Intrinsic.X86Pxor, new IntrinsicInfo(X86Instruction.Pxor, IntrinsicType.Binary)); + Add(Intrinsic.X86Rcpps, new IntrinsicInfo(X86Instruction.Rcpps, IntrinsicType.Unary)); + Add(Intrinsic.X86Rcpss, new IntrinsicInfo(X86Instruction.Rcpss, IntrinsicType.Unary)); + Add(Intrinsic.X86Roundpd, new IntrinsicInfo(X86Instruction.Roundpd, IntrinsicType.BinaryImm)); + Add(Intrinsic.X86Roundps, new IntrinsicInfo(X86Instruction.Roundps, IntrinsicType.BinaryImm)); + Add(Intrinsic.X86Roundsd, new IntrinsicInfo(X86Instruction.Roundsd, IntrinsicType.BinaryImm)); + Add(Intrinsic.X86Roundss, new IntrinsicInfo(X86Instruction.Roundss, IntrinsicType.BinaryImm)); + Add(Intrinsic.X86Rsqrtps, new IntrinsicInfo(X86Instruction.Rsqrtps, IntrinsicType.Unary)); + Add(Intrinsic.X86Rsqrtss, new IntrinsicInfo(X86Instruction.Rsqrtss, IntrinsicType.Unary)); + Add(Intrinsic.X86Sha256Msg1, new IntrinsicInfo(X86Instruction.Sha256Msg1, IntrinsicType.Binary)); + Add(Intrinsic.X86Sha256Msg2, new IntrinsicInfo(X86Instruction.Sha256Msg2, IntrinsicType.Binary)); + Add(Intrinsic.X86Sha256Rnds2, new IntrinsicInfo(X86Instruction.Sha256Rnds2, IntrinsicType.Ternary)); + Add(Intrinsic.X86Shufpd, new IntrinsicInfo(X86Instruction.Shufpd, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Shufps, new IntrinsicInfo(X86Instruction.Shufps, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Sqrtpd, new IntrinsicInfo(X86Instruction.Sqrtpd, IntrinsicType.Unary)); + Add(Intrinsic.X86Sqrtps, new IntrinsicInfo(X86Instruction.Sqrtps, IntrinsicType.Unary)); + Add(Intrinsic.X86Sqrtsd, new IntrinsicInfo(X86Instruction.Sqrtsd, IntrinsicType.Unary)); + Add(Intrinsic.X86Sqrtss, new IntrinsicInfo(X86Instruction.Sqrtss, IntrinsicType.Unary)); + Add(Intrinsic.X86Stmxcsr, new IntrinsicInfo(X86Instruction.None, IntrinsicType.Mxcsr)); + Add(Intrinsic.X86Subpd, new IntrinsicInfo(X86Instruction.Subpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Subps, new IntrinsicInfo(X86Instruction.Subps, IntrinsicType.Binary)); + Add(Intrinsic.X86Subsd, new IntrinsicInfo(X86Instruction.Subsd, IntrinsicType.Binary)); + Add(Intrinsic.X86Subss, new IntrinsicInfo(X86Instruction.Subss, IntrinsicType.Binary)); + Add(Intrinsic.X86Unpckhpd, new IntrinsicInfo(X86Instruction.Unpckhpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Unpckhps, new IntrinsicInfo(X86Instruction.Unpckhps, IntrinsicType.Binary)); + Add(Intrinsic.X86Unpcklpd, new IntrinsicInfo(X86Instruction.Unpcklpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Unpcklps, new IntrinsicInfo(X86Instruction.Unpcklps, IntrinsicType.Binary)); + Add(Intrinsic.X86Vcvtph2ps, new IntrinsicInfo(X86Instruction.Vcvtph2ps, IntrinsicType.Unary)); + Add(Intrinsic.X86Vcvtps2ph, new IntrinsicInfo(X86Instruction.Vcvtps2ph, IntrinsicType.BinaryImm)); + Add(Intrinsic.X86Vfmadd231pd, new IntrinsicInfo(X86Instruction.Vfmadd231pd, IntrinsicType.Fma)); + Add(Intrinsic.X86Vfmadd231ps, new IntrinsicInfo(X86Instruction.Vfmadd231ps, IntrinsicType.Fma)); + Add(Intrinsic.X86Vfmadd231sd, new IntrinsicInfo(X86Instruction.Vfmadd231sd, IntrinsicType.Fma)); + Add(Intrinsic.X86Vfmadd231ss, new IntrinsicInfo(X86Instruction.Vfmadd231ss, IntrinsicType.Fma)); + Add(Intrinsic.X86Vfmsub231sd, new IntrinsicInfo(X86Instruction.Vfmsub231sd, IntrinsicType.Fma)); + Add(Intrinsic.X86Vfmsub231ss, new IntrinsicInfo(X86Instruction.Vfmsub231ss, IntrinsicType.Fma)); + Add(Intrinsic.X86Vfnmadd231pd, new IntrinsicInfo(X86Instruction.Vfnmadd231pd, IntrinsicType.Fma)); + Add(Intrinsic.X86Vfnmadd231ps, new IntrinsicInfo(X86Instruction.Vfnmadd231ps, IntrinsicType.Fma)); + Add(Intrinsic.X86Vfnmadd231sd, new IntrinsicInfo(X86Instruction.Vfnmadd231sd, IntrinsicType.Fma)); + Add(Intrinsic.X86Vfnmadd231ss, new IntrinsicInfo(X86Instruction.Vfnmadd231ss, IntrinsicType.Fma)); + Add(Intrinsic.X86Vfnmsub231sd, new IntrinsicInfo(X86Instruction.Vfnmsub231sd, IntrinsicType.Fma)); + Add(Intrinsic.X86Vfnmsub231ss, new IntrinsicInfo(X86Instruction.Vfnmsub231ss, IntrinsicType.Fma)); + Add(Intrinsic.X86Vpternlogd, new IntrinsicInfo(X86Instruction.Vpternlogd, IntrinsicType.TernaryImm)); + Add(Intrinsic.X86Xorpd, new IntrinsicInfo(X86Instruction.Xorpd, IntrinsicType.Binary)); + Add(Intrinsic.X86Xorps, new IntrinsicInfo(X86Instruction.Xorps, IntrinsicType.Binary)); + } + + private static void Add(Intrinsic intrin, IntrinsicInfo info) + { + _intrinTable[(int)intrin] = info; + } + + public static IntrinsicInfo GetInfo(Intrinsic intrin) + { + return _intrinTable[(int)intrin]; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/IntrinsicType.cs b/src/ARMeilleure/CodeGen/X86/IntrinsicType.cs new file mode 100644 index 00000000..5a9c14af --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/IntrinsicType.cs @@ -0,0 +1,18 @@ +namespace ARMeilleure.CodeGen.X86 +{ + enum IntrinsicType + { + Comis_, + Mxcsr, + PopCount, + Unary, + UnaryToGpr, + Binary, + BinaryGpr, + BinaryImm, + Crc32, + Ternary, + TernaryImm, + Fma + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/Mxcsr.cs b/src/ARMeilleure/CodeGen/X86/Mxcsr.cs new file mode 100644 index 00000000..c61eac31 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/Mxcsr.cs @@ -0,0 +1,15 @@ +using System; + +namespace ARMeilleure.CodeGen.X86 +{ + [Flags] + enum Mxcsr + { + Ftz = 1 << 15, // Flush To Zero. + Rhi = 1 << 14, // Round Mode high bit. + Rlo = 1 << 13, // Round Mode low bit. + Um = 1 << 11, // Underflow Mask. + Dm = 1 << 8, // Denormal Mask. + Daz = 1 << 6 // Denormals Are Zero. + } +} diff --git a/src/ARMeilleure/CodeGen/X86/PreAllocator.cs b/src/ARMeilleure/CodeGen/X86/PreAllocator.cs new file mode 100644 index 00000000..cb742d67 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/PreAllocator.cs @@ -0,0 +1,796 @@ +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.CodeGen.X86 +{ + class PreAllocator + { + public static void RunPass(CompilerContext cctx, StackAllocator stackAlloc, out int maxCallArgs) + { + maxCallArgs = -1; + + Span<Operation> buffer = default; + + CallConvName callConv = CallingConvention.GetCurrentCallConv(); + + Operand[] preservedArgs = new Operand[CallingConvention.GetArgumentsOnRegsCount()]; + + for (BasicBlock block = cctx.Cfg.Blocks.First; block != null; block = block.ListNext) + { + Operation nextNode; + + for (Operation node = block.Operations.First; node != default; node = nextNode) + { + nextNode = node.ListNext; + + if (node.Instruction == Instruction.Phi) + { + continue; + } + + InsertConstantRegCopies(block.Operations, node); + InsertDestructiveRegCopies(block.Operations, node); + InsertConstrainedRegCopies(block.Operations, node); + + switch (node.Instruction) + { + case Instruction.Call: + // Get the maximum number of arguments used on a call. + // On windows, when a struct is returned from the call, + // we also need to pass the pointer where the struct + // should be written on the first argument. + int argsCount = node.SourcesCount - 1; + + if (node.Destination != default && node.Destination.Type == OperandType.V128) + { + argsCount++; + } + + if (maxCallArgs < argsCount) + { + maxCallArgs = argsCount; + } + + // Copy values to registers expected by the function + // being called, as mandated by the ABI. + if (callConv == CallConvName.Windows) + { + PreAllocatorWindows.InsertCallCopies(block.Operations, stackAlloc, node); + } + else /* if (callConv == CallConvName.SystemV) */ + { + PreAllocatorSystemV.InsertCallCopies(block.Operations, node); + } + break; + + case Instruction.ConvertToFPUI: + GenerateConvertToFPUI(block.Operations, node); + break; + + case Instruction.LoadArgument: + if (callConv == CallConvName.Windows) + { + nextNode = PreAllocatorWindows.InsertLoadArgumentCopy(cctx, ref buffer, block.Operations, preservedArgs, node); + } + else /* if (callConv == CallConvName.SystemV) */ + { + nextNode = PreAllocatorSystemV.InsertLoadArgumentCopy(cctx, ref buffer, block.Operations, preservedArgs, node); + } + break; + + case Instruction.Negate: + if (!node.GetSource(0).Type.IsInteger()) + { + GenerateNegate(block.Operations, node); + } + break; + + case Instruction.Return: + if (callConv == CallConvName.Windows) + { + PreAllocatorWindows.InsertReturnCopy(cctx, block.Operations, preservedArgs, node); + } + else /* if (callConv == CallConvName.SystemV) */ + { + PreAllocatorSystemV.InsertReturnCopy(block.Operations, node); + } + break; + + case Instruction.Tailcall: + if (callConv == CallConvName.Windows) + { + PreAllocatorWindows.InsertTailcallCopies(block.Operations, stackAlloc, node); + } + else + { + PreAllocatorSystemV.InsertTailcallCopies(block.Operations, stackAlloc, node); + } + break; + + case Instruction.VectorInsert8: + if (!HardwareCapabilities.SupportsSse41) + { + GenerateVectorInsert8(block.Operations, node); + } + break; + + case Instruction.Extended: + if (node.Intrinsic == Intrinsic.X86Ldmxcsr) + { + int stackOffset = stackAlloc.Allocate(OperandType.I32); + + node.SetSources(new Operand[] { Const(stackOffset), node.GetSource(0) }); + } + else if (node.Intrinsic == Intrinsic.X86Stmxcsr) + { + int stackOffset = stackAlloc.Allocate(OperandType.I32); + + node.SetSources(new Operand[] { Const(stackOffset) }); + } + break; + } + } + } + } + + protected static void InsertConstantRegCopies(IntrusiveList<Operation> nodes, Operation node) + { + if (node.SourcesCount == 0 || IsXmmIntrinsic(node)) + { + return; + } + + Instruction inst = node.Instruction; + + Operand src1 = node.GetSource(0); + Operand src2; + + if (src1.Kind == OperandKind.Constant) + { + if (!src1.Type.IsInteger()) + { + // Handle non-integer types (FP32, FP64 and V128). + // For instructions without an immediate operand, we do the following: + // - Insert a copy with the constant value (as integer) to a GPR. + // - Insert a copy from the GPR to a XMM register. + // - Replace the constant use with the XMM register. + src1 = AddXmmCopy(nodes, node, src1); + + node.SetSource(0, src1); + } + else if (!HasConstSrc1(inst)) + { + // Handle integer types. + // Most ALU instructions accepts a 32-bits immediate on the second operand. + // We need to ensure the following: + // - If the constant is on operand 1, we need to move it. + // -- But first, we try to swap operand 1 and 2 if the instruction is commutative. + // -- Doing so may allow us to encode the constant as operand 2 and avoid a copy. + // - If the constant is on operand 2, we check if the instruction supports it, + // if not, we also add a copy. 64-bits constants are usually not supported. + if (IsCommutative(node)) + { + src2 = node.GetSource(1); + + Operand temp = src1; + + src1 = src2; + src2 = temp; + + node.SetSource(0, src1); + node.SetSource(1, src2); + } + + if (src1.Kind == OperandKind.Constant) + { + src1 = AddCopy(nodes, node, src1); + + node.SetSource(0, src1); + } + } + } + + if (node.SourcesCount < 2) + { + return; + } + + src2 = node.GetSource(1); + + if (src2.Kind == OperandKind.Constant) + { + if (!src2.Type.IsInteger()) + { + src2 = AddXmmCopy(nodes, node, src2); + + node.SetSource(1, src2); + } + else if (!HasConstSrc2(inst) || CodeGenCommon.IsLongConst(src2)) + { + src2 = AddCopy(nodes, node, src2); + + node.SetSource(1, src2); + } + } + } + + protected static void InsertConstrainedRegCopies(IntrusiveList<Operation> nodes, Operation node) + { + Operand dest = node.Destination; + + switch (node.Instruction) + { + case Instruction.CompareAndSwap: + case Instruction.CompareAndSwap16: + case Instruction.CompareAndSwap8: + { + OperandType type = node.GetSource(1).Type; + + if (type == OperandType.V128) + { + // Handle the many restrictions of the compare and exchange (16 bytes) instruction: + // - The expected value should be in RDX:RAX. + // - The new value to be written should be in RCX:RBX. + // - The value at the memory location is loaded to RDX:RAX. + void SplitOperand(Operand source, Operand lr, Operand hr) + { + nodes.AddBefore(node, Operation(Instruction.VectorExtract, lr, source, Const(0))); + nodes.AddBefore(node, Operation(Instruction.VectorExtract, hr, source, Const(1))); + } + + Operand rax = Gpr(X86Register.Rax, OperandType.I64); + Operand rbx = Gpr(X86Register.Rbx, OperandType.I64); + Operand rcx = Gpr(X86Register.Rcx, OperandType.I64); + Operand rdx = Gpr(X86Register.Rdx, OperandType.I64); + + SplitOperand(node.GetSource(1), rax, rdx); + SplitOperand(node.GetSource(2), rbx, rcx); + + Operation operation = node; + + node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, rax)); + nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, rdx, Const(1))); + + operation.SetDestinations(new Operand[] { rdx, rax }); + operation.SetSources(new Operand[] { operation.GetSource(0), rdx, rax, rcx, rbx }); + } + else + { + // Handle the many restrictions of the compare and exchange (32/64) instruction: + // - The expected value should be in (E/R)AX. + // - The value at the memory location is loaded to (E/R)AX. + Operand expected = node.GetSource(1); + Operand newValue = node.GetSource(2); + + Operand rax = Gpr(X86Register.Rax, expected.Type); + + nodes.AddBefore(node, Operation(Instruction.Copy, rax, expected)); + + // We need to store the new value into a temp, since it may + // be a constant, and this instruction does not support immediate operands. + Operand temp = Local(newValue.Type); + + nodes.AddBefore(node, Operation(Instruction.Copy, temp, newValue)); + + node.SetSources(new Operand[] { node.GetSource(0), rax, temp }); + + nodes.AddAfter(node, Operation(Instruction.Copy, dest, rax)); + + node.Destination = rax; + } + + break; + } + + case Instruction.Divide: + case Instruction.DivideUI: + { + // Handle the many restrictions of the division instructions: + // - The dividend is always in RDX:RAX. + // - The result is always in RAX. + // - Additionally it also writes the remainder in RDX. + if (dest.Type.IsInteger()) + { + Operand src1 = node.GetSource(0); + + Operand rax = Gpr(X86Register.Rax, src1.Type); + Operand rdx = Gpr(X86Register.Rdx, src1.Type); + + nodes.AddBefore(node, Operation(Instruction.Copy, rax, src1)); + nodes.AddBefore(node, Operation(Instruction.Clobber, rdx)); + + nodes.AddAfter(node, Operation(Instruction.Copy, dest, rax)); + + node.SetSources(new Operand[] { rdx, rax, node.GetSource(1) }); + node.Destination = rax; + } + + break; + } + + case Instruction.Extended: + { + bool isBlend = node.Intrinsic == Intrinsic.X86Blendvpd || + node.Intrinsic == Intrinsic.X86Blendvps || + node.Intrinsic == Intrinsic.X86Pblendvb; + + // BLENDVPD, BLENDVPS, PBLENDVB last operand is always implied to be XMM0 when VEX is not supported. + // SHA256RNDS2 always has an implied XMM0 as a last operand. + if ((isBlend && !HardwareCapabilities.SupportsVexEncoding) || node.Intrinsic == Intrinsic.X86Sha256Rnds2) + { + Operand xmm0 = Xmm(X86Register.Xmm0, OperandType.V128); + + nodes.AddBefore(node, Operation(Instruction.Copy, xmm0, node.GetSource(2))); + + node.SetSource(2, xmm0); + } + + break; + } + + case Instruction.Multiply64HighSI: + case Instruction.Multiply64HighUI: + { + // Handle the many restrictions of the i64 * i64 = i128 multiply instructions: + // - The multiplicand is always in RAX. + // - The lower 64-bits of the result is always in RAX. + // - The higher 64-bits of the result is always in RDX. + Operand src1 = node.GetSource(0); + + Operand rax = Gpr(X86Register.Rax, src1.Type); + Operand rdx = Gpr(X86Register.Rdx, src1.Type); + + nodes.AddBefore(node, Operation(Instruction.Copy, rax, src1)); + + node.SetSource(0, rax); + + nodes.AddAfter(node, Operation(Instruction.Copy, dest, rdx)); + + node.SetDestinations(new Operand[] { rdx, rax }); + + break; + } + + case Instruction.RotateRight: + case Instruction.ShiftLeft: + case Instruction.ShiftRightSI: + case Instruction.ShiftRightUI: + { + // The shift register is always implied to be CL (low 8-bits of RCX or ECX). + if (node.GetSource(1).Kind == OperandKind.LocalVariable) + { + Operand rcx = Gpr(X86Register.Rcx, OperandType.I32); + + nodes.AddBefore(node, Operation(Instruction.Copy, rcx, node.GetSource(1))); + + node.SetSource(1, rcx); + } + + break; + } + } + } + + protected static void InsertDestructiveRegCopies(IntrusiveList<Operation> nodes, Operation node) + { + if (node.Destination == default || node.SourcesCount == 0) + { + return; + } + + Instruction inst = node.Instruction; + + Operand dest = node.Destination; + Operand src1 = node.GetSource(0); + + // The multiply instruction (that maps to IMUL) is somewhat special, it has + // a three operand form where the second source is a immediate value. + bool threeOperandForm = inst == Instruction.Multiply && node.GetSource(1).Kind == OperandKind.Constant; + + if (IsSameOperandDestSrc1(node) && src1.Kind == OperandKind.LocalVariable && !threeOperandForm) + { + bool useNewLocal = false; + + for (int srcIndex = 1; srcIndex < node.SourcesCount; srcIndex++) + { + if (node.GetSource(srcIndex) == dest) + { + useNewLocal = true; + + break; + } + } + + if (useNewLocal) + { + // Dest is being used as some source already, we need to use a new + // local to store the temporary value, otherwise the value on dest + // local would be overwritten. + Operand temp = Local(dest.Type); + + nodes.AddBefore(node, Operation(Instruction.Copy, temp, src1)); + + node.SetSource(0, temp); + + nodes.AddAfter(node, Operation(Instruction.Copy, dest, temp)); + + node.Destination = temp; + } + else + { + nodes.AddBefore(node, Operation(Instruction.Copy, dest, src1)); + + node.SetSource(0, dest); + } + } + else if (inst == Instruction.ConditionalSelect) + { + Operand src2 = node.GetSource(1); + Operand src3 = node.GetSource(2); + + if (src1 == dest || src2 == dest) + { + Operand temp = Local(dest.Type); + + nodes.AddBefore(node, Operation(Instruction.Copy, temp, src3)); + + node.SetSource(2, temp); + + nodes.AddAfter(node, Operation(Instruction.Copy, dest, temp)); + + node.Destination = temp; + } + else + { + nodes.AddBefore(node, Operation(Instruction.Copy, dest, src3)); + + node.SetSource(2, dest); + } + } + } + + private static void GenerateConvertToFPUI(IntrusiveList<Operation> nodes, Operation node) + { + // Unsigned integer to FP conversions are not supported on X86. + // We need to turn them into signed integer to FP conversions, and + // adjust the final result. + Operand dest = node.Destination; + Operand source = node.GetSource(0); + + Debug.Assert(source.Type.IsInteger(), $"Invalid source type \"{source.Type}\"."); + + Operation currentNode = node; + + if (source.Type == OperandType.I32) + { + // For 32-bits integers, we can just zero-extend to 64-bits, + // and then use the 64-bits signed conversion instructions. + Operand zex = Local(OperandType.I64); + + node = nodes.AddAfter(node, Operation(Instruction.ZeroExtend32, zex, source)); + node = nodes.AddAfter(node, Operation(Instruction.ConvertToFP, dest, zex)); + } + else /* if (source.Type == OperandType.I64) */ + { + // For 64-bits integers, we need to do the following: + // - Ensure that the integer has the most significant bit clear. + // -- This can be done by shifting the value right by 1, that is, dividing by 2. + // -- The least significant bit is lost in this case though. + // - We can then convert the shifted value with a signed integer instruction. + // - The result still needs to be corrected after that. + // -- First, we need to multiply the result by 2, as we divided it by 2 before. + // --- This can be done efficiently by adding the result to itself. + // -- Then, we need to add the least significant bit that was shifted out. + // --- We can convert the least significant bit to float, and add it to the result. + Operand lsb = Local(OperandType.I64); + Operand half = Local(OperandType.I64); + + Operand lsbF = Local(dest.Type); + + node = nodes.AddAfter(node, Operation(Instruction.Copy, lsb, source)); + node = nodes.AddAfter(node, Operation(Instruction.Copy, half, source)); + + node = nodes.AddAfter(node, Operation(Instruction.BitwiseAnd, lsb, lsb, Const(1L))); + node = nodes.AddAfter(node, Operation(Instruction.ShiftRightUI, half, half, Const(1))); + + node = nodes.AddAfter(node, Operation(Instruction.ConvertToFP, lsbF, lsb)); + node = nodes.AddAfter(node, Operation(Instruction.ConvertToFP, dest, half)); + + node = nodes.AddAfter(node, Operation(Instruction.Add, dest, dest, dest)); + nodes.AddAfter(node, Operation(Instruction.Add, dest, dest, lsbF)); + } + + Delete(nodes, currentNode); + } + + private static void GenerateNegate(IntrusiveList<Operation> nodes, Operation node) + { + // There's no SSE FP negate instruction, so we need to transform that into + // a XOR of the value to be negated with a mask with the highest bit set. + // This also produces -0 for a negation of the value 0. + Operand dest = node.Destination; + Operand source = node.GetSource(0); + + Debug.Assert(dest.Type == OperandType.FP32 || + dest.Type == OperandType.FP64, $"Invalid destination type \"{dest.Type}\"."); + + Operation currentNode = node; + + Operand res = Local(dest.Type); + + node = nodes.AddAfter(node, Operation(Instruction.VectorOne, res)); + + if (dest.Type == OperandType.FP32) + { + node = nodes.AddAfter(node, Operation(Intrinsic.X86Pslld, res, res, Const(31))); + } + else /* if (dest.Type == OperandType.FP64) */ + { + node = nodes.AddAfter(node, Operation(Intrinsic.X86Psllq, res, res, Const(63))); + } + + node = nodes.AddAfter(node, Operation(Intrinsic.X86Xorps, res, res, source)); + + nodes.AddAfter(node, Operation(Instruction.Copy, dest, res)); + + Delete(nodes, currentNode); + } + + private static void GenerateVectorInsert8(IntrusiveList<Operation> nodes, Operation node) + { + // Handle vector insertion, when SSE 4.1 is not supported. + Operand dest = node.Destination; + Operand src1 = node.GetSource(0); // Vector + Operand src2 = node.GetSource(1); // Value + Operand src3 = node.GetSource(2); // Index + + Debug.Assert(src3.Kind == OperandKind.Constant); + + byte index = src3.AsByte(); + + Debug.Assert(index < 16); + + Operation currentNode = node; + + Operand temp1 = Local(OperandType.I32); + Operand temp2 = Local(OperandType.I32); + + node = nodes.AddAfter(node, Operation(Instruction.Copy, temp2, src2)); + + Operation vextOp = Operation(Instruction.VectorExtract16, temp1, src1, Const(index >> 1)); + + node = nodes.AddAfter(node, vextOp); + + if ((index & 1) != 0) + { + node = nodes.AddAfter(node, Operation(Instruction.ZeroExtend8, temp1, temp1)); + node = nodes.AddAfter(node, Operation(Instruction.ShiftLeft, temp2, temp2, Const(8))); + node = nodes.AddAfter(node, Operation(Instruction.BitwiseOr, temp1, temp1, temp2)); + } + else + { + node = nodes.AddAfter(node, Operation(Instruction.ZeroExtend8, temp2, temp2)); + node = nodes.AddAfter(node, Operation(Instruction.BitwiseAnd, temp1, temp1, Const(0xff00))); + node = nodes.AddAfter(node, Operation(Instruction.BitwiseOr, temp1, temp1, temp2)); + } + + Operation vinsOp = Operation(Instruction.VectorInsert16, dest, src1, temp1, Const(index >> 1)); + + nodes.AddAfter(node, vinsOp); + + Delete(nodes, currentNode); + } + + protected static Operand AddXmmCopy(IntrusiveList<Operation> nodes, Operation node, Operand source) + { + Operand temp = Local(source.Type); + Operand intConst = AddCopy(nodes, node, GetIntConst(source)); + + Operation copyOp = Operation(Instruction.VectorCreateScalar, temp, intConst); + + nodes.AddBefore(node, copyOp); + + return temp; + } + + protected static Operand AddCopy(IntrusiveList<Operation> nodes, Operation node, Operand source) + { + Operand temp = Local(source.Type); + + Operation copyOp = Operation(Instruction.Copy, temp, source); + + nodes.AddBefore(node, copyOp); + + return temp; + } + + private static Operand GetIntConst(Operand value) + { + if (value.Type == OperandType.FP32) + { + return Const(value.AsInt32()); + } + else if (value.Type == OperandType.FP64) + { + return Const(value.AsInt64()); + } + + return value; + } + + protected static void Delete(IntrusiveList<Operation> nodes, Operation node) + { + node.Destination = default; + + for (int index = 0; index < node.SourcesCount; index++) + { + node.SetSource(index, default); + } + + nodes.Remove(node); + } + + protected static Operand Gpr(X86Register register, OperandType type) + { + return Register((int)register, RegisterType.Integer, type); + } + + protected static Operand Xmm(X86Register register, OperandType type) + { + return Register((int)register, RegisterType.Vector, type); + } + + private static bool IsSameOperandDestSrc1(Operation operation) + { + switch (operation.Instruction) + { + case Instruction.Add: + return !HardwareCapabilities.SupportsVexEncoding && !operation.Destination.Type.IsInteger(); + case Instruction.Multiply: + case Instruction.Subtract: + return !HardwareCapabilities.SupportsVexEncoding || operation.Destination.Type.IsInteger(); + + case Instruction.BitwiseAnd: + case Instruction.BitwiseExclusiveOr: + case Instruction.BitwiseNot: + case Instruction.BitwiseOr: + case Instruction.ByteSwap: + case Instruction.Negate: + case Instruction.RotateRight: + case Instruction.ShiftLeft: + case Instruction.ShiftRightSI: + case Instruction.ShiftRightUI: + return true; + + case Instruction.Divide: + return !HardwareCapabilities.SupportsVexEncoding && !operation.Destination.Type.IsInteger(); + + case Instruction.VectorInsert: + case Instruction.VectorInsert16: + case Instruction.VectorInsert8: + return !HardwareCapabilities.SupportsVexEncoding; + + case Instruction.Extended: + return IsIntrinsicSameOperandDestSrc1(operation); + } + + return IsVexSameOperandDestSrc1(operation); + } + + private static bool IsIntrinsicSameOperandDestSrc1(Operation operation) + { + IntrinsicInfo info = IntrinsicTable.GetInfo(operation.Intrinsic); + + return info.Type == IntrinsicType.Crc32 || info.Type == IntrinsicType.Fma || IsVexSameOperandDestSrc1(operation); + } + + private static bool IsVexSameOperandDestSrc1(Operation operation) + { + if (IsIntrinsic(operation.Instruction)) + { + IntrinsicInfo info = IntrinsicTable.GetInfo(operation.Intrinsic); + + bool hasVex = HardwareCapabilities.SupportsVexEncoding && Assembler.SupportsVexPrefix(info.Inst); + + bool isUnary = operation.SourcesCount < 2; + + bool hasVecDest = operation.Destination != default && operation.Destination.Type == OperandType.V128; + + return !hasVex && !isUnary && hasVecDest; + } + + return false; + } + + private static bool HasConstSrc1(Instruction inst) + { + switch (inst) + { + case Instruction.Copy: + case Instruction.LoadArgument: + case Instruction.Spill: + case Instruction.SpillArg: + return true; + } + + return false; + } + + private static bool HasConstSrc2(Instruction inst) + { + switch (inst) + { + case Instruction.Add: + case Instruction.BitwiseAnd: + case Instruction.BitwiseExclusiveOr: + case Instruction.BitwiseOr: + case Instruction.BranchIf: + case Instruction.Compare: + case Instruction.Multiply: + case Instruction.RotateRight: + case Instruction.ShiftLeft: + case Instruction.ShiftRightSI: + case Instruction.ShiftRightUI: + case Instruction.Store: + case Instruction.Store16: + case Instruction.Store8: + case Instruction.Subtract: + case Instruction.VectorExtract: + case Instruction.VectorExtract16: + case Instruction.VectorExtract8: + return true; + } + + return false; + } + + private static bool IsCommutative(Operation operation) + { + switch (operation.Instruction) + { + case Instruction.Add: + case Instruction.BitwiseAnd: + case Instruction.BitwiseExclusiveOr: + case Instruction.BitwiseOr: + case Instruction.Multiply: + return true; + + case Instruction.BranchIf: + case Instruction.Compare: + { + Operand comp = operation.GetSource(2); + + Debug.Assert(comp.Kind == OperandKind.Constant); + + var compType = (Comparison)comp.AsInt32(); + + return compType == Comparison.Equal || compType == Comparison.NotEqual; + } + } + + return false; + } + + private static bool IsIntrinsic(Instruction inst) + { + return inst == Instruction.Extended; + } + + private static bool IsXmmIntrinsic(Operation operation) + { + if (operation.Instruction != Instruction.Extended) + { + return false; + } + + IntrinsicInfo info = IntrinsicTable.GetInfo(operation.Intrinsic); + + return info.Type != IntrinsicType.Crc32; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/PreAllocatorSystemV.cs b/src/ARMeilleure/CodeGen/X86/PreAllocatorSystemV.cs new file mode 100644 index 00000000..a84d5050 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/PreAllocatorSystemV.cs @@ -0,0 +1,334 @@ +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.CodeGen.X86 +{ + class PreAllocatorSystemV : PreAllocator + { + public static void InsertCallCopies(IntrusiveList<Operation> nodes, Operation node) + { + Operand dest = node.Destination; + + List<Operand> sources = new List<Operand> + { + node.GetSource(0) + }; + + int argsCount = node.SourcesCount - 1; + + int intMax = CallingConvention.GetIntArgumentsOnRegsCount(); + int vecMax = CallingConvention.GetVecArgumentsOnRegsCount(); + + int intCount = 0; + int vecCount = 0; + + int stackOffset = 0; + + for (int index = 0; index < argsCount; index++) + { + Operand source = node.GetSource(index + 1); + + bool passOnReg; + + if (source.Type.IsInteger()) + { + passOnReg = intCount < intMax; + } + else if (source.Type == OperandType.V128) + { + passOnReg = intCount + 1 < intMax; + } + else + { + passOnReg = vecCount < vecMax; + } + + if (source.Type == OperandType.V128 && passOnReg) + { + // V128 is a struct, we pass each half on a GPR if possible. + Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + + nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0))); + nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1))); + + continue; + } + + if (passOnReg) + { + Operand argReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type); + + Operation copyOp = Operation(Instruction.Copy, argReg, source); + + InsertConstantRegCopies(nodes, nodes.AddBefore(node, copyOp)); + + sources.Add(argReg); + } + else + { + Operand offset = Const(stackOffset); + + Operation spillOp = Operation(Instruction.SpillArg, default, offset, source); + + InsertConstantRegCopies(nodes, nodes.AddBefore(node, spillOp)); + + stackOffset += source.Type.GetSizeInBytes(); + } + } + + node.SetSources(sources.ToArray()); + + if (dest != default) + { + if (dest.Type == OperandType.V128) + { + Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64); + Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64); + + Operation operation = node; + + node = nodes.AddAfter(node, Operation(Instruction.VectorCreateScalar, dest, retLReg)); + nodes.AddAfter(node, Operation(Instruction.VectorInsert, dest, dest, retHReg, Const(1))); + + operation.Destination = default; + } + else + { + Operand retReg = dest.Type.IsInteger() + ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type) + : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type); + + Operation copyOp = Operation(Instruction.Copy, dest, retReg); + + nodes.AddAfter(node, copyOp); + + node.Destination = retReg; + } + } + } + + public static void InsertTailcallCopies(IntrusiveList<Operation> nodes, StackAllocator stackAlloc, Operation node) + { + List<Operand> sources = new List<Operand> + { + node.GetSource(0) + }; + + int argsCount = node.SourcesCount - 1; + + int intMax = CallingConvention.GetIntArgumentsOnRegsCount(); + int vecMax = CallingConvention.GetVecArgumentsOnRegsCount(); + + int intCount = 0; + int vecCount = 0; + + // Handle arguments passed on registers. + for (int index = 0; index < argsCount; index++) + { + Operand source = node.GetSource(1 + index); + + bool passOnReg; + + if (source.Type.IsInteger()) + { + passOnReg = intCount + 1 < intMax; + } + else + { + passOnReg = vecCount < vecMax; + } + + if (source.Type == OperandType.V128 && passOnReg) + { + // V128 is a struct, we pass each half on a GPR if possible. + Operand argReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + Operand argReg2 = Gpr(CallingConvention.GetIntArgumentRegister(intCount++), OperandType.I64); + + nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg, source, Const(0))); + nodes.AddBefore(node, Operation(Instruction.VectorExtract, argReg2, source, Const(1))); + + continue; + } + + if (passOnReg) + { + Operand argReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(intCount++), source.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(vecCount++), source.Type); + + Operation copyOp = Operation(Instruction.Copy, argReg, source); + + InsertConstantRegCopies(nodes, nodes.AddBefore(node, copyOp)); + + sources.Add(argReg); + } + else + { + throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)"); + } + } + + // The target address must be on the return registers, since we + // don't return anything and it is guaranteed to not be a + // callee saved register (which would be trashed on the epilogue). + Operand retReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64); + + Operation addrCopyOp = Operation(Instruction.Copy, retReg, node.GetSource(0)); + + nodes.AddBefore(node, addrCopyOp); + + sources[0] = retReg; + + node.SetSources(sources.ToArray()); + } + + public static Operation InsertLoadArgumentCopy( + CompilerContext cctx, + ref Span<Operation> buffer, + IntrusiveList<Operation> nodes, + Operand[] preservedArgs, + Operation node) + { + Operand source = node.GetSource(0); + + Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind."); + + int index = source.AsInt32(); + + int intCount = 0; + int vecCount = 0; + + for (int cIndex = 0; cIndex < index; cIndex++) + { + OperandType argType = cctx.FuncArgTypes[cIndex]; + + if (argType.IsInteger()) + { + intCount++; + } + else if (argType == OperandType.V128) + { + intCount += 2; + } + else + { + vecCount++; + } + } + + bool passOnReg; + + if (source.Type.IsInteger()) + { + passOnReg = intCount < CallingConvention.GetIntArgumentsOnRegsCount(); + } + else if (source.Type == OperandType.V128) + { + passOnReg = intCount + 1 < CallingConvention.GetIntArgumentsOnRegsCount(); + } + else + { + passOnReg = vecCount < CallingConvention.GetVecArgumentsOnRegsCount(); + } + + if (passOnReg) + { + Operand dest = node.Destination; + + if (preservedArgs[index] == default) + { + if (dest.Type == OperandType.V128) + { + // V128 is a struct, we pass each half on a GPR if possible. + Operand pArg = Local(OperandType.V128); + + Operand argLReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount), OperandType.I64); + Operand argHReg = Gpr(CallingConvention.GetIntArgumentRegister(intCount + 1), OperandType.I64); + + Operation copyL = Operation(Instruction.VectorCreateScalar, pArg, argLReg); + Operation copyH = Operation(Instruction.VectorInsert, pArg, pArg, argHReg, Const(1)); + + cctx.Cfg.Entry.Operations.AddFirst(copyH); + cctx.Cfg.Entry.Operations.AddFirst(copyL); + + preservedArgs[index] = pArg; + } + else + { + Operand pArg = Local(dest.Type); + + Operand argReg = dest.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(intCount), dest.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(vecCount), dest.Type); + + Operation copyOp = Operation(Instruction.Copy, pArg, argReg); + + cctx.Cfg.Entry.Operations.AddFirst(copyOp); + + preservedArgs[index] = pArg; + } + } + + Operation nextNode; + + if (dest.AssignmentsCount == 1) + { + // Let's propagate the argument if we can to avoid copies. + PreAllocatorCommon.Propagate(ref buffer, dest, preservedArgs[index]); + nextNode = node.ListNext; + } + else + { + Operation argCopyOp = Operation(Instruction.Copy, dest, preservedArgs[index]); + nextNode = nodes.AddBefore(node, argCopyOp); + } + + Delete(nodes, node); + return nextNode; + } + else + { + // TODO: Pass on stack. + return node; + } + } + + public static void InsertReturnCopy(IntrusiveList<Operation> nodes, Operation node) + { + if (node.SourcesCount == 0) + { + return; + } + + Operand source = node.GetSource(0); + + if (source.Type == OperandType.V128) + { + Operand retLReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64); + Operand retHReg = Gpr(CallingConvention.GetIntReturnRegisterHigh(), OperandType.I64); + + nodes.AddBefore(node, Operation(Instruction.VectorExtract, retLReg, source, Const(0))); + nodes.AddBefore(node, Operation(Instruction.VectorExtract, retHReg, source, Const(1))); + } + else + { + Operand retReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntReturnRegister(), source.Type) + : Xmm(CallingConvention.GetVecReturnRegister(), source.Type); + + Operation retCopyOp = Operation(Instruction.Copy, retReg, source); + + nodes.AddBefore(node, retCopyOp); + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/PreAllocatorWindows.cs b/src/ARMeilleure/CodeGen/X86/PreAllocatorWindows.cs new file mode 100644 index 00000000..45319e6a --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/PreAllocatorWindows.cs @@ -0,0 +1,327 @@ +using ARMeilleure.CodeGen.RegisterAllocators; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.CodeGen.X86 +{ + class PreAllocatorWindows : PreAllocator + { + public static void InsertCallCopies(IntrusiveList<Operation> nodes, StackAllocator stackAlloc, Operation node) + { + Operand dest = node.Destination; + + // Handle struct arguments. + int retArgs = 0; + int stackAllocOffset = 0; + + int AllocateOnStack(int size) + { + // We assume that the stack allocator is initially empty (TotalSize = 0). + // Taking that into account, we can reuse the space allocated for other + // calls by keeping track of our own allocated size (stackAllocOffset). + // If the space allocated is not big enough, then we just expand it. + int offset = stackAllocOffset; + + if (stackAllocOffset + size > stackAlloc.TotalSize) + { + stackAlloc.Allocate((stackAllocOffset + size) - stackAlloc.TotalSize); + } + + stackAllocOffset += size; + + return offset; + } + + Operand arg0Reg = default; + + if (dest != default && dest.Type == OperandType.V128) + { + int stackOffset = AllocateOnStack(dest.Type.GetSizeInBytes()); + + arg0Reg = Gpr(CallingConvention.GetIntArgumentRegister(0), OperandType.I64); + + Operation allocOp = Operation(Instruction.StackAlloc, arg0Reg, Const(stackOffset)); + + nodes.AddBefore(node, allocOp); + + retArgs = 1; + } + + int argsCount = node.SourcesCount - 1; + int maxArgs = CallingConvention.GetArgumentsOnRegsCount() - retArgs; + + if (argsCount > maxArgs) + { + argsCount = maxArgs; + } + + Operand[] sources = new Operand[1 + retArgs + argsCount]; + + sources[0] = node.GetSource(0); + + if (arg0Reg != default) + { + sources[1] = arg0Reg; + } + + for (int index = 1; index < node.SourcesCount; index++) + { + Operand source = node.GetSource(index); + + if (source.Type == OperandType.V128) + { + Operand stackAddr = Local(OperandType.I64); + + int stackOffset = AllocateOnStack(source.Type.GetSizeInBytes()); + + nodes.AddBefore(node, Operation(Instruction.StackAlloc, stackAddr, Const(stackOffset))); + + Operation storeOp = Operation(Instruction.Store, default, stackAddr, source); + + InsertConstantRegCopies(nodes, nodes.AddBefore(node, storeOp)); + + node.SetSource(index, stackAddr); + } + } + + // Handle arguments passed on registers. + for (int index = 0; index < argsCount; index++) + { + Operand source = node.GetSource(index + 1); + Operand argReg; + + int argIndex = index + retArgs; + + if (source.Type.IsInteger()) + { + argReg = Gpr(CallingConvention.GetIntArgumentRegister(argIndex), source.Type); + } + else + { + argReg = Xmm(CallingConvention.GetVecArgumentRegister(argIndex), source.Type); + } + + Operation copyOp = Operation(Instruction.Copy, argReg, source); + + InsertConstantRegCopies(nodes, nodes.AddBefore(node, copyOp)); + + sources[1 + retArgs + index] = argReg; + } + + // The remaining arguments (those that are not passed on registers) + // should be passed on the stack, we write them to the stack with "SpillArg". + for (int index = argsCount; index < node.SourcesCount - 1; index++) + { + Operand source = node.GetSource(index + 1); + Operand offset = Const((index + retArgs) * 8); + + Operation spillOp = Operation(Instruction.SpillArg, default, offset, source); + + InsertConstantRegCopies(nodes, nodes.AddBefore(node, spillOp)); + } + + if (dest != default) + { + if (dest.Type == OperandType.V128) + { + Operand retValueAddr = Local(OperandType.I64); + + nodes.AddBefore(node, Operation(Instruction.Copy, retValueAddr, arg0Reg)); + + Operation loadOp = Operation(Instruction.Load, dest, retValueAddr); + + nodes.AddAfter(node, loadOp); + + node.Destination = default; + } + else + { + Operand retReg = dest.Type.IsInteger() + ? Gpr(CallingConvention.GetIntReturnRegister(), dest.Type) + : Xmm(CallingConvention.GetVecReturnRegister(), dest.Type); + + Operation copyOp = Operation(Instruction.Copy, dest, retReg); + + nodes.AddAfter(node, copyOp); + + node.Destination = retReg; + } + } + + node.SetSources(sources); + } + + public static void InsertTailcallCopies(IntrusiveList<Operation> nodes, StackAllocator stackAlloc, Operation node) + { + int argsCount = node.SourcesCount - 1; + int maxArgs = CallingConvention.GetArgumentsOnRegsCount(); + + if (argsCount > maxArgs) + { + throw new NotImplementedException("Spilling is not currently supported for tail calls. (too many arguments)"); + } + + Operand[] sources = new Operand[1 + argsCount]; + + // Handle arguments passed on registers. + for (int index = 0; index < argsCount; index++) + { + Operand source = node.GetSource(1 + index); + Operand argReg = source.Type.IsInteger() + ? Gpr(CallingConvention.GetIntArgumentRegister(index), source.Type) + : Xmm(CallingConvention.GetVecArgumentRegister(index), source.Type); + + Operation copyOp = Operation(Instruction.Copy, argReg, source); + + InsertConstantRegCopies(nodes, nodes.AddBefore(node, copyOp)); + + sources[1 + index] = argReg; + } + + // The target address must be on the return registers, since we + // don't return anything and it is guaranteed to not be a + // callee saved register (which would be trashed on the epilogue). + Operand retReg = Gpr(CallingConvention.GetIntReturnRegister(), OperandType.I64); + + Operation addrCopyOp = Operation(Instruction.Copy, retReg, node.GetSource(0)); + + nodes.AddBefore(node, addrCopyOp); + + sources[0] = retReg; + + node.SetSources(sources); + } + + public static Operation InsertLoadArgumentCopy( + CompilerContext cctx, + ref Span<Operation> buffer, + IntrusiveList<Operation> nodes, + Operand[] preservedArgs, + Operation node) + { + Operand source = node.GetSource(0); + + Debug.Assert(source.Kind == OperandKind.Constant, "Non-constant LoadArgument source kind."); + + int retArgs = cctx.FuncReturnType == OperandType.V128 ? 1 : 0; + + int index = source.AsInt32() + retArgs; + + if (index < CallingConvention.GetArgumentsOnRegsCount()) + { + Operand dest = node.Destination; + + if (preservedArgs[index] == default) + { + Operand argReg, pArg; + + if (dest.Type.IsInteger()) + { + argReg = Gpr(CallingConvention.GetIntArgumentRegister(index), dest.Type); + pArg = Local(dest.Type); + } + else if (dest.Type == OperandType.V128) + { + argReg = Gpr(CallingConvention.GetIntArgumentRegister(index), OperandType.I64); + pArg = Local(OperandType.I64); + } + else + { + argReg = Xmm(CallingConvention.GetVecArgumentRegister(index), dest.Type); + pArg = Local(dest.Type); + } + + Operation copyOp = Operation(Instruction.Copy, pArg, argReg); + + cctx.Cfg.Entry.Operations.AddFirst(copyOp); + + preservedArgs[index] = pArg; + } + + Operation nextNode; + + if (dest.Type != OperandType.V128 && dest.AssignmentsCount == 1) + { + // Let's propagate the argument if we can to avoid copies. + PreAllocatorCommon.Propagate(ref buffer, dest, preservedArgs[index]); + nextNode = node.ListNext; + } + else + { + Operation argCopyOp = Operation(dest.Type == OperandType.V128 + ? Instruction.Load + : Instruction.Copy, dest, preservedArgs[index]); + + nextNode = nodes.AddBefore(node, argCopyOp); + } + + Delete(nodes, node); + return nextNode; + } + else + { + // TODO: Pass on stack. + return node; + } + } + + public static void InsertReturnCopy( + CompilerContext cctx, + IntrusiveList<Operation> nodes, + Operand[] preservedArgs, + Operation node) + { + if (node.SourcesCount == 0) + { + return; + } + + Operand source = node.GetSource(0); + Operand retReg; + + if (source.Type.IsInteger()) + { + retReg = Gpr(CallingConvention.GetIntReturnRegister(), source.Type); + } + else if (source.Type == OperandType.V128) + { + if (preservedArgs[0] == default) + { + Operand preservedArg = Local(OperandType.I64); + Operand arg0 = Gpr(CallingConvention.GetIntArgumentRegister(0), OperandType.I64); + + Operation copyOp = Operation(Instruction.Copy, preservedArg, arg0); + + cctx.Cfg.Entry.Operations.AddFirst(copyOp); + + preservedArgs[0] = preservedArg; + } + + retReg = preservedArgs[0]; + } + else + { + retReg = Xmm(CallingConvention.GetVecReturnRegister(), source.Type); + } + + if (source.Type == OperandType.V128) + { + Operation retStoreOp = Operation(Instruction.Store, default, retReg, source); + + nodes.AddBefore(node, retStoreOp); + } + else + { + Operation retCopyOp = Operation(Instruction.Copy, retReg, source); + + nodes.AddBefore(node, retCopyOp); + } + + node.SetSources(Array.Empty<Operand>()); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/X86Condition.cs b/src/ARMeilleure/CodeGen/X86/X86Condition.cs new file mode 100644 index 00000000..c82cbdec --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/X86Condition.cs @@ -0,0 +1,47 @@ +using ARMeilleure.IntermediateRepresentation; +using System; + +namespace ARMeilleure.CodeGen.X86 +{ + enum X86Condition + { + Overflow = 0x0, + NotOverflow = 0x1, + Below = 0x2, + AboveOrEqual = 0x3, + Equal = 0x4, + NotEqual = 0x5, + BelowOrEqual = 0x6, + Above = 0x7, + Sign = 0x8, + NotSign = 0x9, + ParityEven = 0xa, + ParityOdd = 0xb, + Less = 0xc, + GreaterOrEqual = 0xd, + LessOrEqual = 0xe, + Greater = 0xf + } + + static class ComparisonX86Extensions + { + public static X86Condition ToX86Condition(this Comparison comp) + { + return comp switch + { + Comparison.Equal => X86Condition.Equal, + Comparison.NotEqual => X86Condition.NotEqual, + Comparison.Greater => X86Condition.Greater, + Comparison.LessOrEqual => X86Condition.LessOrEqual, + Comparison.GreaterUI => X86Condition.Above, + Comparison.LessOrEqualUI => X86Condition.BelowOrEqual, + Comparison.GreaterOrEqual => X86Condition.GreaterOrEqual, + Comparison.Less => X86Condition.Less, + Comparison.GreaterOrEqualUI => X86Condition.AboveOrEqual, + Comparison.LessUI => X86Condition.Below, + + _ => throw new ArgumentException(null, nameof(comp)) + }; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/X86Instruction.cs b/src/ARMeilleure/CodeGen/X86/X86Instruction.cs new file mode 100644 index 00000000..9a85c516 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/X86Instruction.cs @@ -0,0 +1,231 @@ +namespace ARMeilleure.CodeGen.X86 +{ + enum X86Instruction + { + None, + Add, + Addpd, + Addps, + Addsd, + Addss, + Aesdec, + Aesdeclast, + Aesenc, + Aesenclast, + Aesimc, + And, + Andnpd, + Andnps, + Andpd, + Andps, + Blendvpd, + Blendvps, + Bsr, + Bswap, + Call, + Cmovcc, + Cmp, + Cmppd, + Cmpps, + Cmpsd, + Cmpss, + Cmpxchg, + Cmpxchg16b, + Cmpxchg8, + Comisd, + Comiss, + Crc32, + Crc32_16, + Crc32_8, + Cvtdq2pd, + Cvtdq2ps, + Cvtpd2dq, + Cvtpd2ps, + Cvtps2dq, + Cvtps2pd, + Cvtsd2si, + Cvtsd2ss, + Cvtsi2sd, + Cvtsi2ss, + Cvtss2sd, + Cvtss2si, + Div, + Divpd, + Divps, + Divsd, + Divss, + Gf2p8affineqb, + Haddpd, + Haddps, + Idiv, + Imul, + Imul128, + Insertps, + Jmp, + Ldmxcsr, + Lea, + Maxpd, + Maxps, + Maxsd, + Maxss, + Minpd, + Minps, + Minsd, + Minss, + Mov, + Mov16, + Mov8, + Movd, + Movdqu, + Movhlps, + Movlhps, + Movq, + Movsd, + Movss, + Movsx16, + Movsx32, + Movsx8, + Movzx16, + Movzx8, + Mul128, + Mulpd, + Mulps, + Mulsd, + Mulss, + Neg, + Not, + Or, + Paddb, + Paddd, + Paddq, + Paddw, + Palignr, + Pand, + Pandn, + Pavgb, + Pavgw, + Pblendvb, + Pclmulqdq, + Pcmpeqb, + Pcmpeqd, + Pcmpeqq, + Pcmpeqw, + Pcmpgtb, + Pcmpgtd, + Pcmpgtq, + Pcmpgtw, + Pextrb, + Pextrd, + Pextrq, + Pextrw, + Pinsrb, + Pinsrd, + Pinsrq, + Pinsrw, + Pmaxsb, + Pmaxsd, + Pmaxsw, + Pmaxub, + Pmaxud, + Pmaxuw, + Pminsb, + Pminsd, + Pminsw, + Pminub, + Pminud, + Pminuw, + Pmovsxbw, + Pmovsxdq, + Pmovsxwd, + Pmovzxbw, + Pmovzxdq, + Pmovzxwd, + Pmulld, + Pmullw, + Pop, + Popcnt, + Por, + Pshufb, + Pshufd, + Pslld, + Pslldq, + Psllq, + Psllw, + Psrad, + Psraw, + Psrld, + Psrlq, + Psrldq, + Psrlw, + Psubb, + Psubd, + Psubq, + Psubw, + Punpckhbw, + Punpckhdq, + Punpckhqdq, + Punpckhwd, + Punpcklbw, + Punpckldq, + Punpcklqdq, + Punpcklwd, + Push, + Pxor, + Rcpps, + Rcpss, + Ror, + Roundpd, + Roundps, + Roundsd, + Roundss, + Rsqrtps, + Rsqrtss, + Sar, + Setcc, + Sha256Msg1, + Sha256Msg2, + Sha256Rnds2, + Shl, + Shr, + Shufpd, + Shufps, + Sqrtpd, + Sqrtps, + Sqrtsd, + Sqrtss, + Stmxcsr, + Sub, + Subpd, + Subps, + Subsd, + Subss, + Test, + Unpckhpd, + Unpckhps, + Unpcklpd, + Unpcklps, + Vblendvpd, + Vblendvps, + Vcvtph2ps, + Vcvtps2ph, + Vfmadd231pd, + Vfmadd231ps, + Vfmadd231sd, + Vfmadd231ss, + Vfmsub231sd, + Vfmsub231ss, + Vfnmadd231pd, + Vfnmadd231ps, + Vfnmadd231sd, + Vfnmadd231ss, + Vfnmsub231sd, + Vfnmsub231ss, + Vpblendvb, + Vpternlogd, + Xor, + Xorpd, + Xorps, + + Count + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/CodeGen/X86/X86Optimizer.cs b/src/ARMeilleure/CodeGen/X86/X86Optimizer.cs new file mode 100644 index 00000000..98a19b9a --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/X86Optimizer.cs @@ -0,0 +1,259 @@ +using ARMeilleure.CodeGen.Optimizations; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System.Collections.Generic; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.CodeGen.X86 +{ + static class X86Optimizer + { + private const int MaxConstantUses = 10000; + + public static void RunPass(ControlFlowGraph cfg) + { + var constants = new Dictionary<ulong, Operand>(); + + Operand GetConstantCopy(BasicBlock block, Operation operation, Operand source) + { + // If the constant has many uses, we also force a new constant mov to be added, in order + // to avoid overflow of the counts field (that is limited to 16 bits). + if (!constants.TryGetValue(source.Value, out var constant) || constant.UsesCount > MaxConstantUses) + { + constant = Local(source.Type); + + Operation copyOp = Operation(Instruction.Copy, constant, source); + + block.Operations.AddBefore(operation, copyOp); + + constants[source.Value] = constant; + } + + return constant; + } + + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + constants.Clear(); + + Operation nextNode; + + for (Operation node = block.Operations.First; node != default; node = nextNode) + { + nextNode = node.ListNext; + + // Insert copies for constants that can't fit on a 32-bits immediate. + // Doing this early unblocks a few optimizations. + if (node.Instruction == Instruction.Add) + { + Operand src1 = node.GetSource(0); + Operand src2 = node.GetSource(1); + + if (src1.Kind == OperandKind.Constant && (src1.Relocatable || CodeGenCommon.IsLongConst(src1))) + { + node.SetSource(0, GetConstantCopy(block, node, src1)); + } + + if (src2.Kind == OperandKind.Constant && (src2.Relocatable || CodeGenCommon.IsLongConst(src2))) + { + node.SetSource(1, GetConstantCopy(block, node, src2)); + } + } + + // Try to fold something like: + // shl rbx, 2 + // add rax, rbx + // add rax, 0xcafe + // mov rax, [rax] + // Into: + // mov rax, [rax+rbx*4+0xcafe] + if (IsMemoryLoadOrStore(node.Instruction)) + { + OperandType type; + + if (node.Destination != default) + { + type = node.Destination.Type; + } + else + { + type = node.GetSource(1).Type; + } + + Operand memOp = GetMemoryOperandOrNull(node.GetSource(0), type); + + if (memOp != default) + { + node.SetSource(0, memOp); + } + } + } + } + + Optimizer.RemoveUnusedNodes(cfg); + } + + private static Operand GetMemoryOperandOrNull(Operand addr, OperandType type) + { + Operand baseOp = addr; + + // First we check if the address is the result of a local X with 32-bits immediate + // addition. If that is the case, then the baseOp is X, and the memory operand immediate + // becomes the addition immediate. Otherwise baseOp keeps being the address. + int imm = GetConstOp(ref baseOp); + + // Now we check if the baseOp is the result of a local Y with a local Z addition. + // If that is the case, we now set baseOp to Y and indexOp to Z. We further check + // if Z is the result of a left shift of local W by a value >= 0 and <= 3, if that + // is the case, we set indexOp to W and adjust the scale value of the memory operand + // to match that of the left shift. + // There is one missed case, which is the address being a shift result, but this is + // probably not worth optimizing as it should never happen. + (Operand indexOp, Multiplier scale) = GetIndexOp(ref baseOp); + + // If baseOp is still equal to address, then there's nothing that can be optimized. + if (baseOp == addr) + { + return default; + } + + if (imm == 0 && scale == Multiplier.x1 && indexOp != default) + { + imm = GetConstOp(ref indexOp); + } + + return MemoryOp(type, baseOp, indexOp, scale, imm); + } + + private static int GetConstOp(ref Operand baseOp) + { + Operation operation = GetAsgOpWithInst(baseOp, Instruction.Add); + + if (operation == default) + { + return 0; + } + + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + Operand constOp; + Operand otherOp; + + if (src1.Kind == OperandKind.Constant && src2.Kind == OperandKind.LocalVariable) + { + constOp = src1; + otherOp = src2; + } + else if (src1.Kind == OperandKind.LocalVariable && src2.Kind == OperandKind.Constant) + { + constOp = src2; + otherOp = src1; + } + else + { + return 0; + } + + // If we have addition by 64-bits constant, then we can't optimize it further, + // as we can't encode a 64-bits immediate on the memory operand. + if (CodeGenCommon.IsLongConst(constOp)) + { + return 0; + } + + baseOp = otherOp; + + return constOp.AsInt32(); + } + + private static (Operand, Multiplier) GetIndexOp(ref Operand baseOp) + { + Operand indexOp = default; + + Multiplier scale = Multiplier.x1; + + Operation addOp = GetAsgOpWithInst(baseOp, Instruction.Add); + + if (addOp == default) + { + return (indexOp, scale); + } + + Operand src1 = addOp.GetSource(0); + Operand src2 = addOp.GetSource(1); + + if (src1.Kind != OperandKind.LocalVariable || src2.Kind != OperandKind.LocalVariable) + { + return (indexOp, scale); + } + + baseOp = src1; + indexOp = src2; + + Operation shlOp = GetAsgOpWithInst(src1, Instruction.ShiftLeft); + + bool indexOnSrc2 = false; + + if (shlOp == default) + { + shlOp = GetAsgOpWithInst(src2, Instruction.ShiftLeft); + + indexOnSrc2 = true; + } + + if (shlOp != default) + { + Operand shSrc = shlOp.GetSource(0); + Operand shift = shlOp.GetSource(1); + + if (shSrc.Kind == OperandKind.LocalVariable && shift.Kind == OperandKind.Constant && shift.Value <= 3) + { + scale = shift.Value switch + { + 1 => Multiplier.x2, + 2 => Multiplier.x4, + 3 => Multiplier.x8, + _ => Multiplier.x1 + }; + + baseOp = indexOnSrc2 ? src1 : src2; + indexOp = shSrc; + } + } + + return (indexOp, scale); + } + + private static Operation GetAsgOpWithInst(Operand op, Instruction inst) + { + // If we have multiple assignments, folding is not safe + // as the value may be different depending on the + // control flow path. + if (op.AssignmentsCount != 1) + { + return default; + } + + Operation asgOp = op.Assignments[0]; + + if (asgOp.Instruction != inst) + { + return default; + } + + return asgOp; + } + + private static bool IsMemoryLoadOrStore(Instruction inst) + { + return inst == Instruction.Load || + inst == Instruction.Load16 || + inst == Instruction.Load8 || + inst == Instruction.Store || + inst == Instruction.Store16 || + inst == Instruction.Store8; + } + } +} diff --git a/src/ARMeilleure/CodeGen/X86/X86Register.cs b/src/ARMeilleure/CodeGen/X86/X86Register.cs new file mode 100644 index 00000000..01f63e31 --- /dev/null +++ b/src/ARMeilleure/CodeGen/X86/X86Register.cs @@ -0,0 +1,41 @@ +namespace ARMeilleure.CodeGen.X86 +{ + enum X86Register + { + Invalid = -1, + + Rax = 0, + Rcx = 1, + Rdx = 2, + Rbx = 3, + Rsp = 4, + Rbp = 5, + Rsi = 6, + Rdi = 7, + R8 = 8, + R9 = 9, + R10 = 10, + R11 = 11, + R12 = 12, + R13 = 13, + R14 = 14, + R15 = 15, + + Xmm0 = 0, + Xmm1 = 1, + Xmm2 = 2, + Xmm3 = 3, + Xmm4 = 4, + Xmm5 = 5, + Xmm6 = 6, + Xmm7 = 7, + Xmm8 = 8, + Xmm9 = 9, + Xmm10 = 10, + Xmm11 = 11, + Xmm12 = 12, + Xmm13 = 13, + Xmm14 = 14, + Xmm15 = 15 + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Common/AddressTable.cs b/src/ARMeilleure/Common/AddressTable.cs new file mode 100644 index 00000000..9db2d00d --- /dev/null +++ b/src/ARMeilleure/Common/AddressTable.cs @@ -0,0 +1,252 @@ +using ARMeilleure.Diagnostics; +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; + +namespace ARMeilleure.Common +{ + /// <summary> + /// Represents a table of guest address to a value. + /// </summary> + /// <typeparam name="TEntry">Type of the value</typeparam> + unsafe class AddressTable<TEntry> : IDisposable where TEntry : unmanaged + { + /// <summary> + /// Represents a level in an <see cref="AddressTable{TEntry}"/>. + /// </summary> + public readonly struct Level + { + /// <summary> + /// Gets the index of the <see cref="Level"/> in the guest address. + /// </summary> + public int Index { get; } + + /// <summary> + /// Gets the length of the <see cref="Level"/> in the guest address. + /// </summary> + public int Length { get; } + + /// <summary> + /// Gets the mask which masks the bits used by the <see cref="Level"/>. + /// </summary> + public ulong Mask => ((1ul << Length) - 1) << Index; + + /// <summary> + /// Initializes a new instance of the <see cref="Level"/> structure with the specified + /// <paramref name="index"/> and <paramref name="length"/>. + /// </summary> + /// <param name="index">Index of the <see cref="Level"/></param> + /// <param name="length">Length of the <see cref="Level"/></param> + public Level(int index, int length) + { + (Index, Length) = (index, length); + } + + /// <summary> + /// Gets the value of the <see cref="Level"/> from the specified guest <paramref name="address"/>. + /// </summary> + /// <param name="address">Guest address</param> + /// <returns>Value of the <see cref="Level"/> from the specified guest <paramref name="address"/></returns> + public int GetValue(ulong address) + { + return (int)((address & Mask) >> Index); + } + } + + private bool _disposed; + private TEntry** _table; + private readonly List<IntPtr> _pages; + + /// <summary> + /// Gets the bits used by the <see cref="Levels"/> of the <see cref="AddressTable{TEntry}"/> instance. + /// </summary> + public ulong Mask { get; } + + /// <summary> + /// Gets the <see cref="Level"/>s used by the <see cref="AddressTable{TEntry}"/> instance. + /// </summary> + public Level[] Levels { get; } + + /// <summary> + /// Gets or sets the default fill value of newly created leaf pages. + /// </summary> + public TEntry Fill { get; set; } + + /// <summary> + /// Gets the base address of the <see cref="EntryTable{TEntry}"/>. + /// </summary> + /// <exception cref="ObjectDisposedException"><see cref="EntryTable{TEntry}"/> instance was disposed</exception> + public IntPtr Base + { + get + { + ObjectDisposedException.ThrowIf(_disposed, this); + + lock (_pages) + { + return (IntPtr)GetRootPage(); + } + } + } + + /// <summary> + /// Constructs a new instance of the <see cref="AddressTable{TEntry}"/> class with the specified list of + /// <see cref="Level"/>. + /// </summary> + /// <exception cref="ArgumentNullException"><paramref name="levels"/> is null</exception> + /// <exception cref="ArgumentException">Length of <paramref name="levels"/> is less than 2</exception> + public AddressTable(Level[] levels) + { + ArgumentNullException.ThrowIfNull(levels); + + if (levels.Length < 2) + { + throw new ArgumentException("Table must be at least 2 levels deep.", nameof(levels)); + } + + _pages = new List<IntPtr>(capacity: 16); + + Levels = levels; + Mask = 0; + + foreach (var level in Levels) + { + Mask |= level.Mask; + } + } + + /// <summary> + /// Determines if the specified <paramref name="address"/> is in the range of the + /// <see cref="AddressTable{TEntry}"/>. + /// </summary> + /// <param name="address">Guest address</param> + /// <returns><see langword="true"/> if is valid; otherwise <see langword="false"/></returns> + public bool IsValid(ulong address) + { + return (address & ~Mask) == 0; + } + + /// <summary> + /// Gets a reference to the value at the specified guest <paramref name="address"/>. + /// </summary> + /// <param name="address">Guest address</param> + /// <returns>Reference to the value at the specified guest <paramref name="address"/></returns> + /// <exception cref="ObjectDisposedException"><see cref="EntryTable{TEntry}"/> instance was disposed</exception> + /// <exception cref="ArgumentException"><paramref name="address"/> is not mapped</exception> + public ref TEntry GetValue(ulong address) + { + ObjectDisposedException.ThrowIf(_disposed, this); + + if (!IsValid(address)) + { + throw new ArgumentException($"Address 0x{address:X} is not mapped onto the table.", nameof(address)); + } + + lock (_pages) + { + return ref GetPage(address)[Levels[^1].GetValue(address)]; + } + } + + /// <summary> + /// Gets the leaf page for the specified guest <paramref name="address"/>. + /// </summary> + /// <param name="address">Guest address</param> + /// <returns>Leaf page for the specified guest <paramref name="address"/></returns> + private TEntry* GetPage(ulong address) + { + TEntry** page = GetRootPage(); + + for (int i = 0; i < Levels.Length - 1; i++) + { + ref Level level = ref Levels[i]; + ref TEntry* nextPage = ref page[level.GetValue(address)]; + + if (nextPage == null) + { + ref Level nextLevel = ref Levels[i + 1]; + + nextPage = i == Levels.Length - 2 ? + (TEntry*)Allocate(1 << nextLevel.Length, Fill, leaf: true) : + (TEntry*)Allocate(1 << nextLevel.Length, IntPtr.Zero, leaf: false); + } + + page = (TEntry**)nextPage; + } + + return (TEntry*)page; + } + + /// <summary> + /// Lazily initialize and get the root page of the <see cref="AddressTable{TEntry}"/>. + /// </summary> + /// <returns>Root page of the <see cref="AddressTable{TEntry}"/></returns> + private TEntry** GetRootPage() + { + if (_table == null) + { + _table = (TEntry**)Allocate(1 << Levels[0].Length, fill: IntPtr.Zero, leaf: false); + } + + return _table; + } + + /// <summary> + /// Allocates a block of memory of the specified type and length. + /// </summary> + /// <typeparam name="T">Type of elements</typeparam> + /// <param name="length">Number of elements</param> + /// <param name="fill">Fill value</param> + /// <param name="leaf"><see langword="true"/> if leaf; otherwise <see langword="false"/></param> + /// <returns>Allocated block</returns> + private IntPtr Allocate<T>(int length, T fill, bool leaf) where T : unmanaged + { + var size = sizeof(T) * length; + var page = (IntPtr)NativeAllocator.Instance.Allocate((uint)size); + var span = new Span<T>((void*)page, length); + + span.Fill(fill); + + _pages.Add(page); + + TranslatorEventSource.Log.AddressTableAllocated(size, leaf); + + return page; + } + + /// <summary> + /// Releases all resources used by the <see cref="AddressTable{TEntry}"/> instance. + /// </summary> + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + + /// <summary> + /// Releases all unmanaged and optionally managed resources used by the <see cref="AddressTable{TEntry}"/> + /// instance. + /// </summary> + /// <param name="disposing"><see langword="true"/> to dispose managed resources also; otherwise just unmanaged resouces</param> + protected virtual void Dispose(bool disposing) + { + if (!_disposed) + { + foreach (var page in _pages) + { + Marshal.FreeHGlobal(page); + } + + _disposed = true; + } + } + + /// <summary> + /// Frees resources used by the <see cref="AddressTable{TEntry}"/> instance. + /// </summary> + ~AddressTable() + { + Dispose(false); + } + } +} diff --git a/src/ARMeilleure/Common/Allocator.cs b/src/ARMeilleure/Common/Allocator.cs new file mode 100644 index 00000000..247a8e8b --- /dev/null +++ b/src/ARMeilleure/Common/Allocator.cs @@ -0,0 +1,24 @@ +using System; + +namespace ARMeilleure.Common +{ + unsafe abstract class Allocator : IDisposable + { + public T* Allocate<T>(ulong count = 1) where T : unmanaged + { + return (T*)Allocate(count * (uint)sizeof(T)); + } + + public abstract void* Allocate(ulong size); + + public abstract void Free(void* block); + + protected virtual void Dispose(bool disposing) { } + + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + } +} diff --git a/src/ARMeilleure/Common/ArenaAllocator.cs b/src/ARMeilleure/Common/ArenaAllocator.cs new file mode 100644 index 00000000..bce6794a --- /dev/null +++ b/src/ARMeilleure/Common/ArenaAllocator.cs @@ -0,0 +1,187 @@ +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; + +namespace ARMeilleure.Common +{ + unsafe sealed class ArenaAllocator : Allocator + { + private class PageInfo + { + public byte* Pointer; + public byte Unused; + public int UnusedCounter; + } + + private int _lastReset; + private ulong _index; + private int _pageIndex; + private PageInfo _page; + private List<PageInfo> _pages; + private readonly ulong _pageSize; + private readonly uint _pageCount; + private readonly List<IntPtr> _extras; + + public ArenaAllocator(uint pageSize, uint pageCount) + { + _lastReset = Environment.TickCount; + + // Set _index to pageSize so that the first allocation goes through the slow path. + _index = pageSize; + _pageIndex = -1; + + _page = null; + _pages = new List<PageInfo>(); + _pageSize = pageSize; + _pageCount = pageCount; + + _extras = new List<IntPtr>(); + } + + public Span<T> AllocateSpan<T>(ulong count) where T : unmanaged + { + return new Span<T>(Allocate<T>(count), (int)count); + } + + public override void* Allocate(ulong size) + { + if (_index + size <= _pageSize) + { + byte* result = _page.Pointer + _index; + + _index += size; + + return result; + } + + return AllocateSlow(size); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + private void* AllocateSlow(ulong size) + { + if (size > _pageSize) + { + void* extra = NativeAllocator.Instance.Allocate(size); + + _extras.Add((IntPtr)extra); + + return extra; + } + + if (_index + size > _pageSize) + { + _index = 0; + _pageIndex++; + } + + if (_pageIndex < _pages.Count) + { + _page = _pages[_pageIndex]; + _page.Unused = 0; + } + else + { + _page = new PageInfo(); + _page.Pointer = (byte*)NativeAllocator.Instance.Allocate(_pageSize); + + _pages.Add(_page); + } + + byte* result = _page.Pointer + _index; + + _index += size; + + return result; + } + + public override void Free(void* block) { } + + public void Reset() + { + _index = _pageSize; + _pageIndex = -1; + _page = null; + + // Free excess pages that was allocated. + while (_pages.Count > _pageCount) + { + NativeAllocator.Instance.Free(_pages[_pages.Count - 1].Pointer); + + _pages.RemoveAt(_pages.Count - 1); + } + + // Free extra blocks that are not page-sized + foreach (IntPtr ptr in _extras) + { + NativeAllocator.Instance.Free((void*)ptr); + } + + _extras.Clear(); + + // Free pooled pages that has not been used in a while. Remove pages at the back first, because we try to + // keep the pages at the front alive, since they're more likely to be hot and in the d-cache. + bool removing = true; + + // If arena is used frequently, keep pages for longer. Otherwise keep pages for a shorter amount of time. + int now = Environment.TickCount; + int count = (now - _lastReset) switch { + >= 5000 => 0, + >= 2500 => 50, + >= 1000 => 100, + >= 10 => 1500, + _ => 5000 + }; + + for (int i = _pages.Count - 1; i >= 0; i--) + { + PageInfo page = _pages[i]; + + if (page.Unused == 0) + { + page.UnusedCounter = 0; + } + + page.UnusedCounter += page.Unused; + page.Unused = 1; + + // If page not used after `count` resets, remove it. + if (removing && page.UnusedCounter >= count) + { + NativeAllocator.Instance.Free(page.Pointer); + + _pages.RemoveAt(i); + } + else + { + removing = false; + } + } + + _lastReset = now; + } + + protected override void Dispose(bool disposing) + { + if (_pages != null) + { + foreach (PageInfo info in _pages) + { + NativeAllocator.Instance.Free(info.Pointer); + } + + foreach (IntPtr ptr in _extras) + { + NativeAllocator.Instance.Free((void*)ptr); + } + + _pages = null; + } + } + + ~ArenaAllocator() + { + Dispose(false); + } + } +} diff --git a/src/ARMeilleure/Common/BitMap.cs b/src/ARMeilleure/Common/BitMap.cs new file mode 100644 index 00000000..27ef031f --- /dev/null +++ b/src/ARMeilleure/Common/BitMap.cs @@ -0,0 +1,222 @@ +using System; +using System.Collections; +using System.Collections.Generic; +using System.Numerics; +using System.Runtime.CompilerServices; + +namespace ARMeilleure.Common +{ + unsafe class BitMap : IEnumerable<int>, IDisposable + { + private const int IntSize = 64; + private const int IntMask = IntSize - 1; + + private int _count; + private long* _masks; + private readonly Allocator _allocator; + + public BitMap(Allocator allocator) + { + _allocator = allocator; + } + + public BitMap(Allocator allocator, int capacity) : this(allocator) + { + EnsureCapacity(capacity); + } + + public bool Set(int bit) + { + EnsureCapacity(bit + 1); + + int wordIndex = bit / IntSize; + int wordBit = bit & IntMask; + + long wordMask = 1L << wordBit; + + if ((_masks[wordIndex] & wordMask) != 0) + { + return false; + } + + _masks[wordIndex] |= wordMask; + + return true; + } + + public void Clear(int bit) + { + EnsureCapacity(bit + 1); + + int wordIndex = bit / IntSize; + int wordBit = bit & IntMask; + + long wordMask = 1L << wordBit; + + _masks[wordIndex] &= ~wordMask; + } + + public bool IsSet(int bit) + { + EnsureCapacity(bit + 1); + + int wordIndex = bit / IntSize; + int wordBit = bit & IntMask; + + return (_masks[wordIndex] & (1L << wordBit)) != 0; + } + + public int FindFirstUnset() + { + for (int index = 0; index < _count; index++) + { + long mask = _masks[index]; + + if (mask != -1L) + { + return BitOperations.TrailingZeroCount(~mask) + index * IntSize; + } + } + + return _count * IntSize; + } + + public bool Set(BitMap map) + { + EnsureCapacity(map._count * IntSize); + + bool modified = false; + + for (int index = 0; index < _count; index++) + { + long newValue = _masks[index] | map._masks[index]; + + if (_masks[index] != newValue) + { + _masks[index] = newValue; + + modified = true; + } + } + + return modified; + } + + public bool Clear(BitMap map) + { + EnsureCapacity(map._count * IntSize); + + bool modified = false; + + for (int index = 0; index < _count; index++) + { + long newValue = _masks[index] & ~map._masks[index]; + + if (_masks[index] != newValue) + { + _masks[index] = newValue; + + modified = true; + } + } + + return modified; + } + + private void EnsureCapacity(int size) + { + int count = (size + IntMask) / IntSize; + + if (count > _count) + { + var oldMask = _masks; + var oldSpan = new Span<long>(_masks, _count); + + _masks = _allocator.Allocate<long>((uint)count); + _count = count; + + var newSpan = new Span<long>(_masks, _count); + + oldSpan.CopyTo(newSpan); + newSpan.Slice(oldSpan.Length).Clear(); + + _allocator.Free(oldMask); + } + } + + public void Dispose() + { + if (_masks != null) + { + _allocator.Free(_masks); + + _masks = null; + } + } + + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } + + IEnumerator<int> IEnumerable<int>.GetEnumerator() + { + return GetEnumerator(); + } + + public Enumerator GetEnumerator() + { + return new Enumerator(this); + } + + public struct Enumerator : IEnumerator<int> + { + private long _index; + private long _mask; + private int _bit; + private readonly BitMap _map; + + public int Current => (int)_index * IntSize + _bit; + object IEnumerator.Current => Current; + + public Enumerator(BitMap map) + { + _index = -1; + _mask = 0; + _bit = 0; + _map = map; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public bool MoveNext() + { + if (_mask != 0) + { + _mask &= ~(1L << _bit); + } + + // Manually hoist these loads, because RyuJIT does not. + long count = (uint)_map._count; + long* masks = _map._masks; + + while (_mask == 0) + { + if (++_index >= count) + { + return false; + } + + _mask = masks[_index]; + } + + _bit = BitOperations.TrailingZeroCount(_mask); + + return true; + } + + public void Reset() { } + + public void Dispose() { } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Common/BitUtils.cs b/src/ARMeilleure/Common/BitUtils.cs new file mode 100644 index 00000000..e7697ff3 --- /dev/null +++ b/src/ARMeilleure/Common/BitUtils.cs @@ -0,0 +1,57 @@ +using System; +using System.Numerics; + +namespace ARMeilleure.Common +{ + static class BitUtils + { + private static ReadOnlySpan<sbyte> HbsNibbleLut => new sbyte[] { -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3 }; + + public static long FillWithOnes(int bits) + { + return bits == 64 ? -1L : (1L << bits) - 1; + } + + public static int HighestBitSet(int value) + { + return 31 - BitOperations.LeadingZeroCount((uint)value); + } + + public static int HighestBitSetNibble(int value) + { + return HbsNibbleLut[value]; + } + + public static long Replicate(long bits, int size) + { + long output = 0; + + for (int bit = 0; bit < 64; bit += size) + { + output |= bits << bit; + } + + return output; + } + + public static int RotateRight(int bits, int shift, int size) + { + return (int)RotateRight((uint)bits, shift, size); + } + + public static uint RotateRight(uint bits, int shift, int size) + { + return (bits >> shift) | (bits << (size - shift)); + } + + public static long RotateRight(long bits, int shift, int size) + { + return (long)RotateRight((ulong)bits, shift, size); + } + + public static ulong RotateRight(ulong bits, int shift, int size) + { + return (bits >> shift) | (bits << (size - shift)); + } + } +} diff --git a/src/ARMeilleure/Common/Counter.cs b/src/ARMeilleure/Common/Counter.cs new file mode 100644 index 00000000..d7210d15 --- /dev/null +++ b/src/ARMeilleure/Common/Counter.cs @@ -0,0 +1,98 @@ +using System; + +namespace ARMeilleure.Common +{ + /// <summary> + /// Represents a numeric counter which can be used for instrumentation of compiled code. + /// </summary> + /// <typeparam name="T">Type of the counter</typeparam> + class Counter<T> : IDisposable where T : unmanaged + { + private bool _disposed; + /// <summary> + /// Index in the <see cref="EntryTable{T}"/> + /// </summary> + private readonly int _index; + private readonly EntryTable<T> _countTable; + + /// <summary> + /// Initializes a new instance of the <see cref="Counter{T}"/> class from the specified + /// <see cref="EntryTable{T}"/> instance and index. + /// </summary> + /// <param name="countTable"><see cref="EntryTable{T}"/> instance</param> + /// <exception cref="ArgumentNullException"><paramref name="countTable"/> is <see langword="null"/></exception> + /// <exception cref="ArgumentException"><typeparamref name="T"/> is unsupported</exception> + public Counter(EntryTable<T> countTable) + { + if (typeof(T) != typeof(byte) && typeof(T) != typeof(sbyte) && + typeof(T) != typeof(short) && typeof(T) != typeof(ushort) && + typeof(T) != typeof(int) && typeof(T) != typeof(uint) && + typeof(T) != typeof(long) && typeof(T) != typeof(ulong) && + typeof(T) != typeof(nint) && typeof(T) != typeof(nuint) && + typeof(T) != typeof(float) && typeof(T) != typeof(double)) + { + throw new ArgumentException("Counter does not support the specified type."); + } + + _countTable = countTable ?? throw new ArgumentNullException(nameof(countTable)); + _index = countTable.Allocate(); + } + + /// <summary> + /// Gets a reference to the value of the counter. + /// </summary> + /// <exception cref="ObjectDisposedException"><see cref="Counter{T}"/> instance was disposed</exception> + /// <remarks> + /// This can refer to freed memory if the owning <see cref="EntryTable{TEntry}"/> is disposed. + /// </remarks> + public ref T Value + { + get + { + ObjectDisposedException.ThrowIf(_disposed, this); + + return ref _countTable.GetValue(_index); + } + } + + /// <summary> + /// Releases all resources used by the <see cref="Counter{T}"/> instance. + /// </summary> + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + + /// <summary> + /// Releases all unmanaged and optionally managed resources used by the <see cref="Counter{T}"/> instance. + /// </summary> + /// <param name="disposing"><see langword="true"/> to dispose managed resources also; otherwise just unmanaged resources</param> + protected virtual void Dispose(bool disposing) + { + if (!_disposed) + { + try + { + // The index into the EntryTable is essentially an unmanaged resource since we allocate and free the + // resource ourselves. + _countTable.Free(_index); + } + catch (ObjectDisposedException) + { + // Can happen because _countTable may be disposed before the Counter instance. + } + + _disposed = true; + } + } + + /// <summary> + /// Frees resources used by the <see cref="Counter{T}"/> instance. + /// </summary> + ~Counter() + { + Dispose(false); + } + } +} diff --git a/src/ARMeilleure/Common/EntryTable.cs b/src/ARMeilleure/Common/EntryTable.cs new file mode 100644 index 00000000..6f205797 --- /dev/null +++ b/src/ARMeilleure/Common/EntryTable.cs @@ -0,0 +1,188 @@ +using System; +using System.Collections.Generic; +using System.Numerics; + +namespace ARMeilleure.Common +{ + /// <summary> + /// Represents an expandable table of the type <typeparamref name="TEntry"/>, whose entries will remain at the same + /// address through out the table's lifetime. + /// </summary> + /// <typeparam name="TEntry">Type of the entry in the table</typeparam> + class EntryTable<TEntry> : IDisposable where TEntry : unmanaged + { + private bool _disposed; + private int _freeHint; + private readonly int _pageCapacity; // Number of entries per page. + private readonly int _pageLogCapacity; + private readonly Dictionary<int, IntPtr> _pages; + private readonly BitMap _allocated; + + /// <summary> + /// Initializes a new instance of the <see cref="EntryTable{TEntry}"/> class with the desired page size in + /// bytes. + /// </summary> + /// <param name="pageSize">Desired page size in bytes</param> + /// <exception cref="ArgumentOutOfRangeException"><paramref name="pageSize"/> is less than 0</exception> + /// <exception cref="ArgumentException"><typeparamref name="TEntry"/>'s size is zero</exception> + /// <remarks> + /// The actual page size may be smaller or larger depending on the size of <typeparamref name="TEntry"/>. + /// </remarks> + public unsafe EntryTable(int pageSize = 4096) + { + if (pageSize < 0) + { + throw new ArgumentOutOfRangeException(nameof(pageSize), "Page size cannot be negative."); + } + + if (sizeof(TEntry) == 0) + { + throw new ArgumentException("Size of TEntry cannot be zero."); + } + + _allocated = new BitMap(NativeAllocator.Instance); + _pages = new Dictionary<int, IntPtr>(); + _pageLogCapacity = BitOperations.Log2((uint)(pageSize / sizeof(TEntry))); + _pageCapacity = 1 << _pageLogCapacity; + } + + /// <summary> + /// Allocates an entry in the <see cref="EntryTable{TEntry}"/>. + /// </summary> + /// <returns>Index of entry allocated in the table</returns> + /// <exception cref="ObjectDisposedException"><see cref="EntryTable{TEntry}"/> instance was disposed</exception> + public int Allocate() + { + ObjectDisposedException.ThrowIf(_disposed, this); + + lock (_allocated) + { + if (_allocated.IsSet(_freeHint)) + { + _freeHint = _allocated.FindFirstUnset(); + } + + int index = _freeHint++; + var page = GetPage(index); + + _allocated.Set(index); + + GetValue(page, index) = default; + + return index; + } + } + + /// <summary> + /// Frees the entry at the specified <paramref name="index"/>. + /// </summary> + /// <param name="index">Index of entry to free</param> + /// <exception cref="ObjectDisposedException"><see cref="EntryTable{TEntry}"/> instance was disposed</exception> + public void Free(int index) + { + ObjectDisposedException.ThrowIf(_disposed, this); + + lock (_allocated) + { + if (_allocated.IsSet(index)) + { + _allocated.Clear(index); + + _freeHint = index; + } + } + } + + /// <summary> + /// Gets a reference to the entry at the specified allocated <paramref name="index"/>. + /// </summary> + /// <param name="index">Index of the entry</param> + /// <returns>Reference to the entry at the specified <paramref name="index"/></returns> + /// <exception cref="ObjectDisposedException"><see cref="EntryTable{TEntry}"/> instance was disposed</exception> + /// <exception cref="ArgumentException">Entry at <paramref name="index"/> is not allocated</exception> + public ref TEntry GetValue(int index) + { + ObjectDisposedException.ThrowIf(_disposed, this); + + lock (_allocated) + { + if (!_allocated.IsSet(index)) + { + throw new ArgumentException("Entry at the specified index was not allocated", nameof(index)); + } + + var page = GetPage(index); + + return ref GetValue(page, index); + } + } + + /// <summary> + /// Gets a reference to the entry at using the specified <paramref name="index"/> from the specified + /// <paramref name="page"/>. + /// </summary> + /// <param name="page">Page to use</param> + /// <param name="index">Index to use</param> + /// <returns>Reference to the entry</returns> + private ref TEntry GetValue(Span<TEntry> page, int index) + { + return ref page[index & (_pageCapacity - 1)]; + } + + /// <summary> + /// Gets the page for the specified <see cref="index"/>. + /// </summary> + /// <param name="index">Index to use</param> + /// <returns>Page for the specified <see cref="index"/></returns> + private unsafe Span<TEntry> GetPage(int index) + { + var pageIndex = (int)((uint)(index & ~(_pageCapacity - 1)) >> _pageLogCapacity); + + if (!_pages.TryGetValue(pageIndex, out IntPtr page)) + { + page = (IntPtr)NativeAllocator.Instance.Allocate((uint)sizeof(TEntry) * (uint)_pageCapacity); + + _pages.Add(pageIndex, page); + } + + return new Span<TEntry>((void*)page, _pageCapacity); + } + + /// <summary> + /// Releases all resources used by the <see cref="EntryTable{TEntry}"/> instance. + /// </summary> + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + + /// <summary> + /// Releases all unmanaged and optionally managed resources used by the <see cref="EntryTable{TEntry}"/> + /// instance. + /// </summary> + /// <param name="disposing"><see langword="true"/> to dispose managed resources also; otherwise just unmanaged resouces</param> + protected unsafe virtual void Dispose(bool disposing) + { + if (!_disposed) + { + _allocated.Dispose(); + + foreach (var page in _pages.Values) + { + NativeAllocator.Instance.Free((void*)page); + } + + _disposed = true; + } + } + + /// <summary> + /// Frees resources used by the <see cref="EntryTable{TEntry}"/> instance. + /// </summary> + ~EntryTable() + { + Dispose(false); + } + } +} diff --git a/src/ARMeilleure/Common/EnumUtils.cs b/src/ARMeilleure/Common/EnumUtils.cs new file mode 100644 index 00000000..2a4aa645 --- /dev/null +++ b/src/ARMeilleure/Common/EnumUtils.cs @@ -0,0 +1,12 @@ +using System; + +namespace ARMeilleure.Common +{ + static class EnumUtils + { + public static int GetCount(Type enumType) + { + return Enum.GetNames(enumType).Length; + } + } +} diff --git a/src/ARMeilleure/Common/NativeAllocator.cs b/src/ARMeilleure/Common/NativeAllocator.cs new file mode 100644 index 00000000..71c04a9b --- /dev/null +++ b/src/ARMeilleure/Common/NativeAllocator.cs @@ -0,0 +1,27 @@ +using System; +using System.Runtime.InteropServices; + +namespace ARMeilleure.Common +{ + unsafe sealed class NativeAllocator : Allocator + { + public static NativeAllocator Instance { get; } = new(); + + public override void* Allocate(ulong size) + { + void* result = (void*)Marshal.AllocHGlobal((IntPtr)size); + + if (result == null) + { + throw new OutOfMemoryException(); + } + + return result; + } + + public override void Free(void* block) + { + Marshal.FreeHGlobal((IntPtr)block); + } + } +} diff --git a/src/ARMeilleure/Decoders/Block.cs b/src/ARMeilleure/Decoders/Block.cs new file mode 100644 index 00000000..f296d299 --- /dev/null +++ b/src/ARMeilleure/Decoders/Block.cs @@ -0,0 +1,101 @@ +using System; +using System.Collections.Generic; + +namespace ARMeilleure.Decoders +{ + class Block + { + public ulong Address { get; set; } + public ulong EndAddress { get; set; } + + public Block Next { get; set; } + public Block Branch { get; set; } + + public bool Exit { get; set; } + + public List<OpCode> OpCodes { get; } + + public Block() + { + OpCodes = new List<OpCode>(); + } + + public Block(ulong address) : this() + { + Address = address; + } + + public void Split(Block rightBlock) + { + int splitIndex = BinarySearch(OpCodes, rightBlock.Address); + + if (OpCodes[splitIndex].Address < rightBlock.Address) + { + splitIndex++; + } + + int splitCount = OpCodes.Count - splitIndex; + + if (splitCount <= 0) + { + throw new ArgumentException("Can't split at right block address."); + } + + rightBlock.EndAddress = EndAddress; + + rightBlock.Next = Next; + rightBlock.Branch = Branch; + + rightBlock.OpCodes.AddRange(OpCodes.GetRange(splitIndex, splitCount)); + + EndAddress = rightBlock.Address; + + Next = rightBlock; + Branch = null; + + OpCodes.RemoveRange(splitIndex, splitCount); + } + + private static int BinarySearch(List<OpCode> opCodes, ulong address) + { + int left = 0; + int middle = 0; + int right = opCodes.Count - 1; + + while (left <= right) + { + int size = right - left; + + middle = left + (size >> 1); + + OpCode opCode = opCodes[middle]; + + if (address == (ulong)opCode.Address) + { + break; + } + + if (address < (ulong)opCode.Address) + { + right = middle - 1; + } + else + { + left = middle + 1; + } + } + + return middle; + } + + public OpCode GetLastOp() + { + if (OpCodes.Count > 0) + { + return OpCodes[OpCodes.Count - 1]; + } + + return null; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/Condition.cs b/src/ARMeilleure/Decoders/Condition.cs new file mode 100644 index 00000000..727f897d --- /dev/null +++ b/src/ARMeilleure/Decoders/Condition.cs @@ -0,0 +1,32 @@ +namespace ARMeilleure.Decoders +{ + enum Condition + { + Eq = 0, + Ne = 1, + GeUn = 2, + LtUn = 3, + Mi = 4, + Pl = 5, + Vs = 6, + Vc = 7, + GtUn = 8, + LeUn = 9, + Ge = 10, + Lt = 11, + Gt = 12, + Le = 13, + Al = 14, + Nv = 15 + } + + static class ConditionExtensions + { + public static Condition Invert(this Condition cond) + { + // Bit 0 of all conditions is basically a negation bit, so + // inverting this bit has the effect of inverting the condition. + return (Condition)((int)cond ^ 1); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/DataOp.cs b/src/ARMeilleure/Decoders/DataOp.cs new file mode 100644 index 00000000..464d0089 --- /dev/null +++ b/src/ARMeilleure/Decoders/DataOp.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.Decoders +{ + enum DataOp + { + Adr = 0, + Arithmetic = 1, + Logical = 2, + BitField = 3 + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/Decoder.cs b/src/ARMeilleure/Decoders/Decoder.cs new file mode 100644 index 00000000..426465aa --- /dev/null +++ b/src/ARMeilleure/Decoders/Decoder.cs @@ -0,0 +1,391 @@ +using ARMeilleure.Decoders.Optimizations; +using ARMeilleure.Instructions; +using ARMeilleure.Memory; +using ARMeilleure.State; +using System; +using System.Collections.Generic; +using System.Diagnostics; + +namespace ARMeilleure.Decoders +{ + static class Decoder + { + // We define a limit on the number of instructions that a function may have, + // this prevents functions being potentially too large, which would + // take too long to compile and use too much memory. + private const int MaxInstsPerFunction = 2500; + + // For lower code quality translation, we set a lower limit since we're blocking execution. + private const int MaxInstsPerFunctionLowCq = 500; + + public static Block[] Decode(IMemoryManager memory, ulong address, ExecutionMode mode, bool highCq, DecoderMode dMode) + { + List<Block> blocks = new List<Block>(); + + Queue<Block> workQueue = new Queue<Block>(); + + Dictionary<ulong, Block> visited = new Dictionary<ulong, Block>(); + + Debug.Assert(MaxInstsPerFunctionLowCq <= MaxInstsPerFunction); + + int opsCount = 0; + + int instructionLimit = highCq ? MaxInstsPerFunction : MaxInstsPerFunctionLowCq; + + Block GetBlock(ulong blkAddress) + { + if (!visited.TryGetValue(blkAddress, out Block block)) + { + block = new Block(blkAddress); + + if ((dMode != DecoderMode.MultipleBlocks && visited.Count >= 1) || opsCount > instructionLimit || !memory.IsMapped(blkAddress)) + { + block.Exit = true; + block.EndAddress = blkAddress; + } + + workQueue.Enqueue(block); + + visited.Add(blkAddress, block); + } + + return block; + } + + GetBlock(address); + + while (workQueue.TryDequeue(out Block currBlock)) + { + // Check if the current block is inside another block. + if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex)) + { + Block nBlock = blocks[nBlkIndex]; + + if (nBlock.Address == currBlock.Address) + { + throw new InvalidOperationException("Found duplicate block address on the list."); + } + + currBlock.Exit = false; + + nBlock.Split(currBlock); + + blocks.Insert(nBlkIndex + 1, currBlock); + + continue; + } + + if (!currBlock.Exit) + { + // If we have a block after the current one, set the limit address. + ulong limitAddress = ulong.MaxValue; + + if (nBlkIndex != blocks.Count) + { + Block nBlock = blocks[nBlkIndex]; + + int nextIndex = nBlkIndex + 1; + + if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count) + { + limitAddress = blocks[nextIndex].Address; + } + else if (nBlock.Address > currBlock.Address) + { + limitAddress = blocks[nBlkIndex].Address; + } + } + + if (dMode == DecoderMode.SingleInstruction) + { + // Only read at most one instruction + limitAddress = currBlock.Address + 1; + } + + FillBlock(memory, mode, currBlock, limitAddress); + + opsCount += currBlock.OpCodes.Count; + + if (currBlock.OpCodes.Count != 0) + { + // Set child blocks. "Branch" is the block the branch instruction + // points to (when taken), "Next" is the block at the next address, + // executed when the branch is not taken. For Unconditional Branches + // (except BL/BLR that are sub calls) or end of executable, Next is null. + OpCode lastOp = currBlock.GetLastOp(); + + bool isCall = IsCall(lastOp); + + if (lastOp is IOpCodeBImm op && !isCall) + { + currBlock.Branch = GetBlock((ulong)op.Immediate); + } + + if (isCall || !(IsUnconditionalBranch(lastOp) || IsTrap(lastOp))) + { + currBlock.Next = GetBlock(currBlock.EndAddress); + } + } + } + + // Insert the new block on the list (sorted by address). + if (blocks.Count != 0) + { + Block nBlock = blocks[nBlkIndex]; + + blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock); + } + else + { + blocks.Add(currBlock); + } + } + + if (blocks.Count == 1 && blocks[0].OpCodes.Count == 0) + { + Debug.Assert(blocks[0].Exit); + Debug.Assert(blocks[0].Address == blocks[0].EndAddress); + + throw new InvalidOperationException($"Decoded a single empty exit block. Entry point = 0x{address:X}."); + } + + if (dMode == DecoderMode.MultipleBlocks) + { + return TailCallRemover.RunPass(address, blocks); + } + else + { + return blocks.ToArray(); + } + } + + public static bool BinarySearch(List<Block> blocks, ulong address, out int index) + { + index = 0; + + int left = 0; + int right = blocks.Count - 1; + + while (left <= right) + { + int size = right - left; + + int middle = left + (size >> 1); + + Block block = blocks[middle]; + + index = middle; + + if (address >= block.Address && address < block.EndAddress) + { + return true; + } + + if (address < block.Address) + { + right = middle - 1; + } + else + { + left = middle + 1; + } + } + + return false; + } + + private static void FillBlock( + IMemoryManager memory, + ExecutionMode mode, + Block block, + ulong limitAddress) + { + ulong address = block.Address; + int itBlockSize = 0; + + OpCode opCode; + + do + { + if (address >= limitAddress && itBlockSize == 0) + { + break; + } + + opCode = DecodeOpCode(memory, address, mode); + + block.OpCodes.Add(opCode); + + address += (ulong)opCode.OpCodeSizeInBytes; + + if (opCode is OpCodeT16IfThen it) + { + itBlockSize = it.IfThenBlockSize; + } + else if (itBlockSize > 0) + { + itBlockSize--; + } + } + while (!(IsBranch(opCode) || IsException(opCode))); + + block.EndAddress = address; + } + + private static bool IsBranch(OpCode opCode) + { + return opCode is OpCodeBImm || + opCode is OpCodeBReg || IsAarch32Branch(opCode); + } + + private static bool IsUnconditionalBranch(OpCode opCode) + { + return opCode is OpCodeBImmAl || + opCode is OpCodeBReg || IsAarch32UnconditionalBranch(opCode); + } + + private static bool IsAarch32UnconditionalBranch(OpCode opCode) + { + if (!(opCode is OpCode32 op)) + { + return false; + } + + // Compare and branch instructions are always conditional. + if (opCode.Instruction.Name == InstName.Cbz || + opCode.Instruction.Name == InstName.Cbnz) + { + return false; + } + + // Note: On ARM32, most instructions have conditional execution, + // so there's no "Always" (unconditional) branch like on ARM64. + // We need to check if the condition is "Always" instead. + return IsAarch32Branch(op) && op.Cond >= Condition.Al; + } + + private static bool IsAarch32Branch(OpCode opCode) + { + // Note: On ARM32, most ALU operations can write to R15 (PC), + // so we must consider such operations as a branch in potential aswell. + if (opCode is IOpCode32Alu opAlu && opAlu.Rd == RegisterAlias.Aarch32Pc) + { + if (opCode is OpCodeT32) + { + return opCode.Instruction.Name != InstName.Tst && opCode.Instruction.Name != InstName.Teq && + opCode.Instruction.Name != InstName.Cmp && opCode.Instruction.Name != InstName.Cmn; + } + return true; + } + + // Same thing for memory operations. We have the cases where PC is a target + // register (Rt == 15 or (mask & (1 << 15)) != 0), and cases where there is + // a write back to PC (wback == true && Rn == 15), however the later may + // be "undefined" depending on the CPU, so compilers should not produce that. + if (opCode is IOpCode32Mem || opCode is IOpCode32MemMult) + { + int rt, rn; + + bool wBack, isLoad; + + if (opCode is IOpCode32Mem opMem) + { + rt = opMem.Rt; + rn = opMem.Rn; + wBack = opMem.WBack; + isLoad = opMem.IsLoad; + + // For the dual load, we also need to take into account the + // case were Rt2 == 15 (PC). + if (rt == 14 && opMem.Instruction.Name == InstName.Ldrd) + { + rt = RegisterAlias.Aarch32Pc; + } + } + else if (opCode is IOpCode32MemMult opMemMult) + { + const int pcMask = 1 << RegisterAlias.Aarch32Pc; + + rt = (opMemMult.RegisterMask & pcMask) != 0 ? RegisterAlias.Aarch32Pc : 0; + rn = opMemMult.Rn; + wBack = opMemMult.PostOffset != 0; + isLoad = opMemMult.IsLoad; + } + else + { + throw new NotImplementedException($"The type \"{opCode.GetType().Name}\" is not implemented on the decoder."); + } + + if ((rt == RegisterAlias.Aarch32Pc && isLoad) || + (rn == RegisterAlias.Aarch32Pc && wBack)) + { + return true; + } + } + + // Explicit branch instructions. + return opCode is IOpCode32BImm || + opCode is IOpCode32BReg; + } + + private static bool IsCall(OpCode opCode) + { + return opCode.Instruction.Name == InstName.Bl || + opCode.Instruction.Name == InstName.Blr || + opCode.Instruction.Name == InstName.Blx; + } + + private static bool IsException(OpCode opCode) + { + return IsTrap(opCode) || opCode.Instruction.Name == InstName.Svc; + } + + private static bool IsTrap(OpCode opCode) + { + return opCode.Instruction.Name == InstName.Brk || + opCode.Instruction.Name == InstName.Trap || + opCode.Instruction.Name == InstName.Und; + } + + public static OpCode DecodeOpCode(IMemoryManager memory, ulong address, ExecutionMode mode) + { + int opCode = memory.Read<int>(address); + + InstDescriptor inst; + + OpCodeTable.MakeOp makeOp; + + if (mode == ExecutionMode.Aarch64) + { + (inst, makeOp) = OpCodeTable.GetInstA64(opCode); + } + else + { + if (mode == ExecutionMode.Aarch32Arm) + { + (inst, makeOp) = OpCodeTable.GetInstA32(opCode); + } + else /* if (mode == ExecutionMode.Aarch32Thumb) */ + { + (inst, makeOp) = OpCodeTable.GetInstT32(opCode); + } + } + + if (makeOp != null) + { + return makeOp(inst, address, opCode); + } + else + { + if (mode == ExecutionMode.Aarch32Thumb) + { + return new OpCodeT16(inst, address, opCode); + } + else + { + return new OpCode(inst, address, opCode); + } + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/DecoderHelper.cs b/src/ARMeilleure/Decoders/DecoderHelper.cs new file mode 100644 index 00000000..5227e6a1 --- /dev/null +++ b/src/ARMeilleure/Decoders/DecoderHelper.cs @@ -0,0 +1,167 @@ +using ARMeilleure.Common; + +namespace ARMeilleure.Decoders +{ + static class DecoderHelper + { + static DecoderHelper() + { + Imm8ToFP32Table = BuildImm8ToFP32Table(); + Imm8ToFP64Table = BuildImm8ToFP64Table(); + } + + public static readonly uint[] Imm8ToFP32Table; + public static readonly ulong[] Imm8ToFP64Table; + + private static uint[] BuildImm8ToFP32Table() + { + uint[] tbl = new uint[256]; + + for (int idx = 0; idx < tbl.Length; idx++) + { + tbl[idx] = ExpandImm8ToFP32((uint)idx); + } + + return tbl; + } + + private static ulong[] BuildImm8ToFP64Table() + { + ulong[] tbl = new ulong[256]; + + for (int idx = 0; idx < tbl.Length; idx++) + { + tbl[idx] = ExpandImm8ToFP64((ulong)idx); + } + + return tbl; + } + + // abcdefgh -> aBbbbbbc defgh000 00000000 00000000 (B = ~b) + private static uint ExpandImm8ToFP32(uint imm) + { + uint MoveBit(uint bits, int from, int to) + { + return ((bits >> from) & 1U) << to; + } + + return MoveBit(imm, 7, 31) | MoveBit(~imm, 6, 30) | + MoveBit(imm, 6, 29) | MoveBit( imm, 6, 28) | + MoveBit(imm, 6, 27) | MoveBit( imm, 6, 26) | + MoveBit(imm, 6, 25) | MoveBit( imm, 5, 24) | + MoveBit(imm, 4, 23) | MoveBit( imm, 3, 22) | + MoveBit(imm, 2, 21) | MoveBit( imm, 1, 20) | + MoveBit(imm, 0, 19); + } + + // abcdefgh -> aBbbbbbb bbcdefgh 00000000 00000000 00000000 00000000 00000000 00000000 (B = ~b) + private static ulong ExpandImm8ToFP64(ulong imm) + { + ulong MoveBit(ulong bits, int from, int to) + { + return ((bits >> from) & 1UL) << to; + } + + return MoveBit(imm, 7, 63) | MoveBit(~imm, 6, 62) | + MoveBit(imm, 6, 61) | MoveBit( imm, 6, 60) | + MoveBit(imm, 6, 59) | MoveBit( imm, 6, 58) | + MoveBit(imm, 6, 57) | MoveBit( imm, 6, 56) | + MoveBit(imm, 6, 55) | MoveBit( imm, 6, 54) | + MoveBit(imm, 5, 53) | MoveBit( imm, 4, 52) | + MoveBit(imm, 3, 51) | MoveBit( imm, 2, 50) | + MoveBit(imm, 1, 49) | MoveBit( imm, 0, 48); + } + + public struct BitMask + { + public long WMask; + public long TMask; + public int Pos; + public int Shift; + public bool IsUndefined; + + public static BitMask Invalid => new BitMask { IsUndefined = true }; + } + + public static BitMask DecodeBitMask(int opCode, bool immediate) + { + int immS = (opCode >> 10) & 0x3f; + int immR = (opCode >> 16) & 0x3f; + + int n = (opCode >> 22) & 1; + int sf = (opCode >> 31) & 1; + + int length = BitUtils.HighestBitSet((~immS & 0x3f) | (n << 6)); + + if (length < 1 || (sf == 0 && n != 0)) + { + return BitMask.Invalid; + } + + int size = 1 << length; + + int levels = size - 1; + + int s = immS & levels; + int r = immR & levels; + + if (immediate && s == levels) + { + return BitMask.Invalid; + } + + long wMask = BitUtils.FillWithOnes(s + 1); + long tMask = BitUtils.FillWithOnes(((s - r) & levels) + 1); + + if (r > 0) + { + wMask = BitUtils.RotateRight(wMask, r, size); + wMask &= BitUtils.FillWithOnes(size); + } + + return new BitMask() + { + WMask = BitUtils.Replicate(wMask, size), + TMask = BitUtils.Replicate(tMask, size), + + Pos = immS, + Shift = immR + }; + } + + public static long DecodeImm24_2(int opCode) + { + return ((long)opCode << 40) >> 38; + } + + public static long DecodeImm26_2(int opCode) + { + return ((long)opCode << 38) >> 36; + } + + public static long DecodeImmS19_2(int opCode) + { + return (((long)opCode << 40) >> 43) & ~3; + } + + public static long DecodeImmS14_2(int opCode) + { + return (((long)opCode << 45) >> 48) & ~3; + } + + public static bool VectorArgumentsInvalid(bool q, params int[] args) + { + if (q) + { + for (int i = 0; i < args.Length; i++) + { + if ((args[i] & 1) == 1) + { + return true; + } + } + } + return false; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/DecoderMode.cs b/src/ARMeilleure/Decoders/DecoderMode.cs new file mode 100644 index 00000000..55362084 --- /dev/null +++ b/src/ARMeilleure/Decoders/DecoderMode.cs @@ -0,0 +1,9 @@ +namespace ARMeilleure.Decoders +{ + enum DecoderMode + { + MultipleBlocks, + SingleBlock, + SingleInstruction, + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/IOpCode.cs b/src/ARMeilleure/Decoders/IOpCode.cs new file mode 100644 index 00000000..37ba7a4c --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCode.cs @@ -0,0 +1,17 @@ +using ARMeilleure.IntermediateRepresentation; + +namespace ARMeilleure.Decoders +{ + interface IOpCode + { + ulong Address { get; } + + InstDescriptor Instruction { get; } + + RegisterSize RegisterSize { get; } + + int GetBitsCount(); + + OperandType GetOperandType(); + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/IOpCode32.cs b/src/ARMeilleure/Decoders/IOpCode32.cs new file mode 100644 index 00000000..126c1069 --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCode32.cs @@ -0,0 +1,9 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32 : IOpCode + { + Condition Cond { get; } + + uint GetPc(); + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/IOpCode32Adr.cs b/src/ARMeilleure/Decoders/IOpCode32Adr.cs new file mode 100644 index 00000000..40a4f526 --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCode32Adr.cs @@ -0,0 +1,9 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32Adr + { + int Rd { get; } + + int Immediate { get; } + } +} diff --git a/src/ARMeilleure/Decoders/IOpCode32Alu.cs b/src/ARMeilleure/Decoders/IOpCode32Alu.cs new file mode 100644 index 00000000..69fee164 --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCode32Alu.cs @@ -0,0 +1,8 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32Alu : IOpCode32, IOpCode32HasSetFlags + { + int Rd { get; } + int Rn { get; } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/IOpCode32AluBf.cs b/src/ARMeilleure/Decoders/IOpCode32AluBf.cs new file mode 100644 index 00000000..206c2965 --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCode32AluBf.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32AluBf + { + int Rd { get; } + int Rn { get; } + + int Msb { get; } + int Lsb { get; } + + int SourceMask => (int)(0xFFFFFFFF >> (31 - Msb)); + int DestMask => SourceMask & (int)(0xFFFFFFFF << Lsb); + } +} diff --git a/src/ARMeilleure/Decoders/IOpCode32AluImm.cs b/src/ARMeilleure/Decoders/IOpCode32AluImm.cs new file mode 100644 index 00000000..342fb8f6 --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCode32AluImm.cs @@ -0,0 +1,9 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32AluImm : IOpCode32Alu + { + int Immediate { get; } + + bool IsRotated { get; } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/IOpCode32AluImm16.cs b/src/ARMeilleure/Decoders/IOpCode32AluImm16.cs new file mode 100644 index 00000000..cd128f65 --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCode32AluImm16.cs @@ -0,0 +1,7 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32AluImm16 : IOpCode32Alu + { + int Immediate { get; } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/IOpCode32AluMla.cs b/src/ARMeilleure/Decoders/IOpCode32AluMla.cs new file mode 100644 index 00000000..79b16425 --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCode32AluMla.cs @@ -0,0 +1,11 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32AluMla : IOpCode32AluReg + { + int Ra { get; } + + bool NHigh { get; } + bool MHigh { get; } + bool R { get; } + } +} diff --git a/src/ARMeilleure/Decoders/IOpCode32AluReg.cs b/src/ARMeilleure/Decoders/IOpCode32AluReg.cs new file mode 100644 index 00000000..1612cc5c --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCode32AluReg.cs @@ -0,0 +1,7 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32AluReg : IOpCode32Alu + { + int Rm { get; } + } +} diff --git a/src/ARMeilleure/Decoders/IOpCode32AluRsImm.cs b/src/ARMeilleure/Decoders/IOpCode32AluRsImm.cs new file mode 100644 index 00000000..e899a659 --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCode32AluRsImm.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32AluRsImm : IOpCode32Alu + { + int Rm { get; } + int Immediate { get; } + + ShiftType ShiftType { get; } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/IOpCode32AluRsReg.cs b/src/ARMeilleure/Decoders/IOpCode32AluRsReg.cs new file mode 100644 index 00000000..879db059 --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCode32AluRsReg.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32AluRsReg : IOpCode32Alu + { + int Rm { get; } + int Rs { get; } + + ShiftType ShiftType { get; } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/IOpCode32AluUmull.cs b/src/ARMeilleure/Decoders/IOpCode32AluUmull.cs new file mode 100644 index 00000000..79d2bb9b --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCode32AluUmull.cs @@ -0,0 +1,13 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32AluUmull : IOpCode32, IOpCode32HasSetFlags + { + int RdLo { get; } + int RdHi { get; } + int Rn { get; } + int Rm { get; } + + bool NHigh { get; } + bool MHigh { get; } + } +} diff --git a/src/ARMeilleure/Decoders/IOpCode32AluUx.cs b/src/ARMeilleure/Decoders/IOpCode32AluUx.cs new file mode 100644 index 00000000..d03c7e21 --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCode32AluUx.cs @@ -0,0 +1,8 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32AluUx : IOpCode32AluReg + { + int RotateBits { get; } + bool Add { get; } + } +} diff --git a/src/ARMeilleure/Decoders/IOpCode32BImm.cs b/src/ARMeilleure/Decoders/IOpCode32BImm.cs new file mode 100644 index 00000000..ec7db2c2 --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCode32BImm.cs @@ -0,0 +1,4 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32BImm : IOpCode32, IOpCodeBImm { } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/IOpCode32BReg.cs b/src/ARMeilleure/Decoders/IOpCode32BReg.cs new file mode 100644 index 00000000..097ab427 --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCode32BReg.cs @@ -0,0 +1,7 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32BReg : IOpCode32 + { + int Rm { get; } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/IOpCode32Exception.cs b/src/ARMeilleure/Decoders/IOpCode32Exception.cs new file mode 100644 index 00000000..8f0fb81a --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCode32Exception.cs @@ -0,0 +1,7 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32Exception + { + int Id { get; } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/IOpCode32HasSetFlags.cs b/src/ARMeilleure/Decoders/IOpCode32HasSetFlags.cs new file mode 100644 index 00000000..71ca6d19 --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCode32HasSetFlags.cs @@ -0,0 +1,7 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32HasSetFlags + { + bool? SetFlags { get; } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/IOpCode32Mem.cs b/src/ARMeilleure/Decoders/IOpCode32Mem.cs new file mode 100644 index 00000000..6664ddff --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCode32Mem.cs @@ -0,0 +1,16 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32Mem : IOpCode32 + { + int Rt { get; } + int Rt2 => Rt | 1; + int Rn { get; } + + bool WBack { get; } + bool IsLoad { get; } + bool Index { get; } + bool Add { get; } + + int Immediate { get; } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/IOpCode32MemEx.cs b/src/ARMeilleure/Decoders/IOpCode32MemEx.cs new file mode 100644 index 00000000..aca7200a --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCode32MemEx.cs @@ -0,0 +1,7 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32MemEx : IOpCode32Mem + { + int Rd { get; } + } +} diff --git a/src/ARMeilleure/Decoders/IOpCode32MemMult.cs b/src/ARMeilleure/Decoders/IOpCode32MemMult.cs new file mode 100644 index 00000000..4b891bc1 --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCode32MemMult.cs @@ -0,0 +1,15 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32MemMult : IOpCode32 + { + int Rn { get; } + + int RegisterMask { get; } + + int PostOffset { get; } + + bool IsLoad { get; } + + int Offset { get; } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/IOpCode32MemReg.cs b/src/ARMeilleure/Decoders/IOpCode32MemReg.cs new file mode 100644 index 00000000..7fe1b022 --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCode32MemReg.cs @@ -0,0 +1,7 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32MemReg : IOpCode32Mem + { + int Rm { get; } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/IOpCode32MemRsImm.cs b/src/ARMeilleure/Decoders/IOpCode32MemRsImm.cs new file mode 100644 index 00000000..65b7ee0b --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCode32MemRsImm.cs @@ -0,0 +1,8 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32MemRsImm : IOpCode32Mem + { + int Rm { get; } + ShiftType ShiftType { get; } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/IOpCode32Simd.cs b/src/ARMeilleure/Decoders/IOpCode32Simd.cs new file mode 100644 index 00000000..687254d9 --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCode32Simd.cs @@ -0,0 +1,4 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32Simd : IOpCode32, IOpCodeSimd { } +} diff --git a/src/ARMeilleure/Decoders/IOpCode32SimdImm.cs b/src/ARMeilleure/Decoders/IOpCode32SimdImm.cs new file mode 100644 index 00000000..a0cb669c --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCode32SimdImm.cs @@ -0,0 +1,9 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCode32SimdImm : IOpCode32Simd + { + int Vd { get; } + long Immediate { get; } + int Elems { get; } + } +} diff --git a/src/ARMeilleure/Decoders/IOpCodeAlu.cs b/src/ARMeilleure/Decoders/IOpCodeAlu.cs new file mode 100644 index 00000000..b8c28513 --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCodeAlu.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCodeAlu : IOpCode + { + int Rd { get; } + int Rn { get; } + + DataOp DataOp { get; } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/IOpCodeAluImm.cs b/src/ARMeilleure/Decoders/IOpCodeAluImm.cs new file mode 100644 index 00000000..02f4c997 --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCodeAluImm.cs @@ -0,0 +1,7 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCodeAluImm : IOpCodeAlu + { + long Immediate { get; } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/IOpCodeAluRs.cs b/src/ARMeilleure/Decoders/IOpCodeAluRs.cs new file mode 100644 index 00000000..22540b11 --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCodeAluRs.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCodeAluRs : IOpCodeAlu + { + int Shift { get; } + int Rm { get; } + + ShiftType ShiftType { get; } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/IOpCodeAluRx.cs b/src/ARMeilleure/Decoders/IOpCodeAluRx.cs new file mode 100644 index 00000000..9d16be78 --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCodeAluRx.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCodeAluRx : IOpCodeAlu + { + int Shift { get; } + int Rm { get; } + + IntType IntType { get; } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/IOpCodeBImm.cs b/src/ARMeilleure/Decoders/IOpCodeBImm.cs new file mode 100644 index 00000000..958bff28 --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCodeBImm.cs @@ -0,0 +1,7 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCodeBImm : IOpCode + { + long Immediate { get; } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/IOpCodeCond.cs b/src/ARMeilleure/Decoders/IOpCodeCond.cs new file mode 100644 index 00000000..9808f7c0 --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCodeCond.cs @@ -0,0 +1,7 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCodeCond : IOpCode + { + Condition Cond { get; } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/IOpCodeLit.cs b/src/ARMeilleure/Decoders/IOpCodeLit.cs new file mode 100644 index 00000000..74084a45 --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCodeLit.cs @@ -0,0 +1,11 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCodeLit : IOpCode + { + int Rt { get; } + long Immediate { get; } + int Size { get; } + bool Signed { get; } + bool Prefetch { get; } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/IOpCodeSimd.cs b/src/ARMeilleure/Decoders/IOpCodeSimd.cs new file mode 100644 index 00000000..056ef045 --- /dev/null +++ b/src/ARMeilleure/Decoders/IOpCodeSimd.cs @@ -0,0 +1,7 @@ +namespace ARMeilleure.Decoders +{ + interface IOpCodeSimd : IOpCode + { + int Size { get; } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/InstDescriptor.cs b/src/ARMeilleure/Decoders/InstDescriptor.cs new file mode 100644 index 00000000..577ff394 --- /dev/null +++ b/src/ARMeilleure/Decoders/InstDescriptor.cs @@ -0,0 +1,18 @@ +using ARMeilleure.Instructions; + +namespace ARMeilleure.Decoders +{ + readonly struct InstDescriptor + { + public static InstDescriptor Undefined => new InstDescriptor(InstName.Und, InstEmit.Und); + + public InstName Name { get; } + public InstEmitter Emitter { get; } + + public InstDescriptor(InstName name, InstEmitter emitter) + { + Name = name; + Emitter = emitter; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/InstEmitter.cs b/src/ARMeilleure/Decoders/InstEmitter.cs new file mode 100644 index 00000000..a8b52656 --- /dev/null +++ b/src/ARMeilleure/Decoders/InstEmitter.cs @@ -0,0 +1,6 @@ +using ARMeilleure.Translation; + +namespace ARMeilleure.Decoders +{ + delegate void InstEmitter(ArmEmitterContext context); +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/IntType.cs b/src/ARMeilleure/Decoders/IntType.cs new file mode 100644 index 00000000..244e9680 --- /dev/null +++ b/src/ARMeilleure/Decoders/IntType.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.Decoders +{ + enum IntType + { + UInt8 = 0, + UInt16 = 1, + UInt32 = 2, + UInt64 = 3, + Int8 = 4, + Int16 = 5, + Int32 = 6, + Int64 = 7 + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCode.cs b/src/ARMeilleure/Decoders/OpCode.cs new file mode 100644 index 00000000..f9aed792 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode.cs @@ -0,0 +1,49 @@ +using ARMeilleure.IntermediateRepresentation; +using System; + +namespace ARMeilleure.Decoders +{ + class OpCode : IOpCode + { + public ulong Address { get; } + public int RawOpCode { get; } + + public int OpCodeSizeInBytes { get; protected set; } = 4; + + public InstDescriptor Instruction { get; protected set; } + + public RegisterSize RegisterSize { get; protected set; } + + public static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode(inst, address, opCode); + + public OpCode(InstDescriptor inst, ulong address, int opCode) + { + Instruction = inst; + Address = address; + RawOpCode = opCode; + + RegisterSize = RegisterSize.Int64; + } + + public int GetPairsCount() => GetBitsCount() / 16; + public int GetBytesCount() => GetBitsCount() / 8; + + public int GetBitsCount() + { + switch (RegisterSize) + { + case RegisterSize.Int32: return 32; + case RegisterSize.Int64: return 64; + case RegisterSize.Simd64: return 64; + case RegisterSize.Simd128: return 128; + } + + throw new InvalidOperationException(); + } + + public OperandType GetOperandType() + { + return RegisterSize == RegisterSize.Int32 ? OperandType.I32 : OperandType.I64; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCode32.cs b/src/ARMeilleure/Decoders/OpCode32.cs new file mode 100644 index 00000000..c2f14145 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32.cs @@ -0,0 +1,34 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32 : OpCode + { + public Condition Cond { get; protected set; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32(inst, address, opCode); + + public OpCode32(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + RegisterSize = RegisterSize.Int32; + + Cond = (Condition)((uint)opCode >> 28); + } + + public bool IsThumb { get; protected init; } = false; + + public uint GetPc() + { + // Due to backwards compatibility and legacy behavior of ARMv4 CPUs pipeline, + // the PC actually points 2 instructions ahead. + if (IsThumb) + { + // PC is ahead by 4 in thumb mode whether or not the current instruction + // is 16 or 32 bit. + return (uint)Address + 4u; + } + else + { + return (uint)Address + 8u; + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCode32Alu.cs b/src/ARMeilleure/Decoders/OpCode32Alu.cs new file mode 100644 index 00000000..1625aee0 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32Alu.cs @@ -0,0 +1,20 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32Alu : OpCode32, IOpCode32Alu + { + public int Rd { get; } + public int Rn { get; } + + public bool? SetFlags { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32Alu(inst, address, opCode); + + public OpCode32Alu(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 12) & 0xf; + Rn = (opCode >> 16) & 0xf; + + SetFlags = ((opCode >> 20) & 1) != 0; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCode32AluBf.cs b/src/ARMeilleure/Decoders/OpCode32AluBf.cs new file mode 100644 index 00000000..0cee34e6 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32AluBf.cs @@ -0,0 +1,22 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32AluBf : OpCode32, IOpCode32AluBf + { + public int Rd { get; } + public int Rn { get; } + + public int Msb { get; } + public int Lsb { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluBf(inst, address, opCode); + + public OpCode32AluBf(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 12) & 0xf; + Rn = (opCode >> 0) & 0xf; + + Msb = (opCode >> 16) & 0x1f; + Lsb = (opCode >> 7) & 0x1f; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32AluImm.cs b/src/ARMeilleure/Decoders/OpCode32AluImm.cs new file mode 100644 index 00000000..b5435aaf --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32AluImm.cs @@ -0,0 +1,23 @@ +using ARMeilleure.Common; + +namespace ARMeilleure.Decoders +{ + class OpCode32AluImm : OpCode32Alu, IOpCode32AluImm + { + public int Immediate { get; } + + public bool IsRotated { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluImm(inst, address, opCode); + + public OpCode32AluImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int value = (opCode >> 0) & 0xff; + int shift = (opCode >> 8) & 0xf; + + Immediate = BitUtils.RotateRight(value, shift * 2, 32); + + IsRotated = shift != 0; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCode32AluImm16.cs b/src/ARMeilleure/Decoders/OpCode32AluImm16.cs new file mode 100644 index 00000000..e24edeb4 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32AluImm16.cs @@ -0,0 +1,17 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32AluImm16 : OpCode32Alu, IOpCode32AluImm16 + { + public int Immediate { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluImm16(inst, address, opCode); + + public OpCode32AluImm16(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int imm12 = opCode & 0xfff; + int imm4 = (opCode >> 16) & 0xf; + + Immediate = (imm4 << 12) | imm12; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32AluMla.cs b/src/ARMeilleure/Decoders/OpCode32AluMla.cs new file mode 100644 index 00000000..2cd2b9dc --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32AluMla.cs @@ -0,0 +1,30 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32AluMla : OpCode32, IOpCode32AluMla + { + public int Rn { get; } + public int Rm { get; } + public int Ra { get; } + public int Rd { get; } + + public bool NHigh { get; } + public bool MHigh { get; } + public bool R { get; } + public bool? SetFlags { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluMla(inst, address, opCode); + + public OpCode32AluMla(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rn = (opCode >> 0) & 0xf; + Rm = (opCode >> 8) & 0xf; + Ra = (opCode >> 12) & 0xf; + Rd = (opCode >> 16) & 0xf; + R = (opCode & (1 << 5)) != 0; + + NHigh = ((opCode >> 5) & 0x1) == 1; + MHigh = ((opCode >> 6) & 0x1) == 1; + SetFlags = ((opCode >> 20) & 1) != 0; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32AluReg.cs b/src/ARMeilleure/Decoders/OpCode32AluReg.cs new file mode 100644 index 00000000..493a977f --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32AluReg.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32AluReg : OpCode32Alu, IOpCode32AluReg + { + public int Rm { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluReg(inst, address, opCode); + + public OpCode32AluReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 0) & 0xf; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32AluRsImm.cs b/src/ARMeilleure/Decoders/OpCode32AluRsImm.cs new file mode 100644 index 00000000..c2dee6c9 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32AluRsImm.cs @@ -0,0 +1,20 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32AluRsImm : OpCode32Alu, IOpCode32AluRsImm + { + public int Rm { get; } + public int Immediate { get; } + + public ShiftType ShiftType { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluRsImm(inst, address, opCode); + + public OpCode32AluRsImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 0) & 0xf; + Immediate = (opCode >> 7) & 0x1f; + + ShiftType = (ShiftType)((opCode >> 5) & 3); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCode32AluRsReg.cs b/src/ARMeilleure/Decoders/OpCode32AluRsReg.cs new file mode 100644 index 00000000..04740d08 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32AluRsReg.cs @@ -0,0 +1,20 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32AluRsReg : OpCode32Alu, IOpCode32AluRsReg + { + public int Rm { get; } + public int Rs { get; } + + public ShiftType ShiftType { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluRsReg(inst, address, opCode); + + public OpCode32AluRsReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 0) & 0xf; + Rs = (opCode >> 8) & 0xf; + + ShiftType = (ShiftType)((opCode >> 5) & 3); + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32AluUmull.cs b/src/ARMeilleure/Decoders/OpCode32AluUmull.cs new file mode 100644 index 00000000..bf80df3f --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32AluUmull.cs @@ -0,0 +1,30 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32AluUmull : OpCode32, IOpCode32AluUmull + { + public int RdLo { get; } + public int RdHi { get; } + public int Rn { get; } + public int Rm { get; } + + public bool NHigh { get; } + public bool MHigh { get; } + + public bool? SetFlags { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluUmull(inst, address, opCode); + + public OpCode32AluUmull(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + RdLo = (opCode >> 12) & 0xf; + RdHi = (opCode >> 16) & 0xf; + Rm = (opCode >> 8) & 0xf; + Rn = (opCode >> 0) & 0xf; + + NHigh = ((opCode >> 5) & 0x1) == 1; + MHigh = ((opCode >> 6) & 0x1) == 1; + + SetFlags = ((opCode >> 20) & 0x1) != 0; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32AluUx.cs b/src/ARMeilleure/Decoders/OpCode32AluUx.cs new file mode 100644 index 00000000..57068675 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32AluUx.cs @@ -0,0 +1,18 @@ +using ARMeilleure.State; + +namespace ARMeilleure.Decoders +{ + class OpCode32AluUx : OpCode32AluReg, IOpCode32AluUx + { + public int Rotate { get; } + public int RotateBits => Rotate * 8; + public bool Add => Rn != RegisterAlias.Aarch32Pc; + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32AluUx(inst, address, opCode); + + public OpCode32AluUx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rotate = (opCode >> 10) & 0x3; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32BImm.cs b/src/ARMeilleure/Decoders/OpCode32BImm.cs new file mode 100644 index 00000000..f2959b33 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32BImm.cs @@ -0,0 +1,29 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32BImm : OpCode32, IOpCode32BImm + { + public long Immediate { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32BImm(inst, address, opCode); + + public OpCode32BImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + uint pc = GetPc(); + + // When the condition is never, the instruction is BLX to Thumb mode. + if (Cond != Condition.Nv) + { + pc &= ~3u; + } + + Immediate = pc + DecoderHelper.DecodeImm24_2(opCode); + + if (Cond == Condition.Nv) + { + long H = (opCode >> 23) & 2; + + Immediate |= H; + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCode32BReg.cs b/src/ARMeilleure/Decoders/OpCode32BReg.cs new file mode 100644 index 00000000..d4f5f760 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32BReg.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32BReg : OpCode32, IOpCode32BReg + { + public int Rm { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32BReg(inst, address, opCode); + + public OpCode32BReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = opCode & 0xf; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCode32Exception.cs b/src/ARMeilleure/Decoders/OpCode32Exception.cs new file mode 100644 index 00000000..b4edcc10 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32Exception.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32Exception : OpCode32, IOpCode32Exception + { + public int Id { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32Exception(inst, address, opCode); + + public OpCode32Exception(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Id = opCode & 0xFFFFFF; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32Mem.cs b/src/ARMeilleure/Decoders/OpCode32Mem.cs new file mode 100644 index 00000000..ceb1e49f --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32Mem.cs @@ -0,0 +1,39 @@ +using ARMeilleure.Instructions; + +namespace ARMeilleure.Decoders +{ + class OpCode32Mem : OpCode32, IOpCode32Mem + { + public int Rt { get; protected set; } + public int Rn { get; } + + public int Immediate { get; protected set; } + + public bool Index { get; } + public bool Add { get; } + public bool WBack { get; } + public bool Unprivileged { get; } + + public bool IsLoad { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32Mem(inst, address, opCode); + + public OpCode32Mem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt = (opCode >> 12) & 0xf; + Rn = (opCode >> 16) & 0xf; + + bool isLoad = (opCode & (1 << 20)) != 0; + bool w = (opCode & (1 << 21)) != 0; + bool u = (opCode & (1 << 23)) != 0; + bool p = (opCode & (1 << 24)) != 0; + + Index = p; + Add = u; + WBack = !p || w; + Unprivileged = !p && w; + + IsLoad = isLoad || inst.Name == InstName.Ldrd; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCode32MemImm.cs b/src/ARMeilleure/Decoders/OpCode32MemImm.cs new file mode 100644 index 00000000..3af4b6f7 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32MemImm.cs @@ -0,0 +1,12 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32MemImm : OpCode32Mem + { + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32MemImm(inst, address, opCode); + + public OpCode32MemImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Immediate = opCode & 0xfff; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCode32MemImm8.cs b/src/ARMeilleure/Decoders/OpCode32MemImm8.cs new file mode 100644 index 00000000..1b8a57de --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32MemImm8.cs @@ -0,0 +1,15 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32MemImm8 : OpCode32Mem + { + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32MemImm8(inst, address, opCode); + + public OpCode32MemImm8(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int imm4L = (opCode >> 0) & 0xf; + int imm4H = (opCode >> 8) & 0xf; + + Immediate = imm4L | (imm4H << 4); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCode32MemLdEx.cs b/src/ARMeilleure/Decoders/OpCode32MemLdEx.cs new file mode 100644 index 00000000..520113f4 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32MemLdEx.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32MemLdEx : OpCode32Mem, IOpCode32MemEx + { + public int Rd { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32MemLdEx(inst, address, opCode); + + public OpCode32MemLdEx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = opCode & 0xf; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32MemMult.cs b/src/ARMeilleure/Decoders/OpCode32MemMult.cs new file mode 100644 index 00000000..522b96bb --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32MemMult.cs @@ -0,0 +1,52 @@ +using System.Numerics; + +namespace ARMeilleure.Decoders +{ + class OpCode32MemMult : OpCode32, IOpCode32MemMult + { + public int Rn { get; } + + public int RegisterMask { get; } + public int Offset { get; } + public int PostOffset { get; } + + public bool IsLoad { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32MemMult(inst, address, opCode); + + public OpCode32MemMult(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rn = (opCode >> 16) & 0xf; + + bool isLoad = (opCode & (1 << 20)) != 0; + bool w = (opCode & (1 << 21)) != 0; + bool u = (opCode & (1 << 23)) != 0; + bool p = (opCode & (1 << 24)) != 0; + + RegisterMask = opCode & 0xffff; + + int regsSize = BitOperations.PopCount((uint)RegisterMask) * 4; + + if (!u) + { + Offset -= regsSize; + } + + if (u == p) + { + Offset += 4; + } + + if (w) + { + PostOffset = u ? regsSize : -regsSize; + } + else + { + PostOffset = 0; + } + + IsLoad = isLoad; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCode32MemReg.cs b/src/ARMeilleure/Decoders/OpCode32MemReg.cs new file mode 100644 index 00000000..786f37fa --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32MemReg.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32MemReg : OpCode32Mem, IOpCode32MemReg + { + public int Rm { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32MemReg(inst, address, opCode); + + public OpCode32MemReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 0) & 0xf; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32MemRsImm.cs b/src/ARMeilleure/Decoders/OpCode32MemRsImm.cs new file mode 100644 index 00000000..e1284cf7 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32MemRsImm.cs @@ -0,0 +1,18 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32MemRsImm : OpCode32Mem, IOpCode32MemRsImm + { + public int Rm { get; } + public ShiftType ShiftType { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32MemRsImm(inst, address, opCode); + + public OpCode32MemRsImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 0) & 0xf; + Immediate = (opCode >> 7) & 0x1f; + + ShiftType = (ShiftType)((opCode >> 5) & 3); + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32MemStEx.cs b/src/ARMeilleure/Decoders/OpCode32MemStEx.cs new file mode 100644 index 00000000..dcf93b22 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32MemStEx.cs @@ -0,0 +1,15 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32MemStEx : OpCode32Mem, IOpCode32MemEx + { + public int Rd { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32MemStEx(inst, address, opCode); + + public OpCode32MemStEx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 12) & 0xf; + Rt = (opCode >> 0) & 0xf; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32Mrs.cs b/src/ARMeilleure/Decoders/OpCode32Mrs.cs new file mode 100644 index 00000000..c34a8b99 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32Mrs.cs @@ -0,0 +1,16 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32Mrs : OpCode32 + { + public bool R { get; } + public int Rd { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32Mrs(inst, address, opCode); + + public OpCode32Mrs(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + R = ((opCode >> 22) & 1) != 0; + Rd = (opCode >> 12) & 0xf; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32MsrReg.cs b/src/ARMeilleure/Decoders/OpCode32MsrReg.cs new file mode 100644 index 00000000..d897ffd8 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32MsrReg.cs @@ -0,0 +1,29 @@ +using ARMeilleure.State; + +namespace ARMeilleure.Decoders +{ + class OpCode32MsrReg : OpCode32 + { + public bool R { get; } + public int Mask { get; } + public int Rd { get; } + public bool Banked { get; } + public int Rn { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32MsrReg(inst, address, opCode); + + public OpCode32MsrReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + R = ((opCode >> 22) & 1) != 0; + Mask = (opCode >> 16) & 0xf; + Rd = (opCode >> 12) & 0xf; + Banked = ((opCode >> 9) & 1) != 0; + Rn = (opCode >> 0) & 0xf; + + if (Rn == RegisterAlias.Aarch32Pc || Mask == 0) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32Sat.cs b/src/ARMeilleure/Decoders/OpCode32Sat.cs new file mode 100644 index 00000000..621def27 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32Sat.cs @@ -0,0 +1,24 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32Sat : OpCode32 + { + public int Rn { get; } + public int Imm5 { get; } + public int Rd { get; } + public int SatImm { get; } + + public ShiftType ShiftType { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32Sat(inst, address, opCode); + + public OpCode32Sat(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rn = (opCode >> 0) & 0xf; + Imm5 = (opCode >> 7) & 0x1f; + Rd = (opCode >> 12) & 0xf; + SatImm = (opCode >> 16) & 0x1f; + + ShiftType = (ShiftType)((opCode >> 5) & 2); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCode32Sat16.cs b/src/ARMeilleure/Decoders/OpCode32Sat16.cs new file mode 100644 index 00000000..51061b07 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32Sat16.cs @@ -0,0 +1,18 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32Sat16 : OpCode32 + { + public int Rn { get; } + public int Rd { get; } + public int SatImm { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32Sat16(inst, address, opCode); + + public OpCode32Sat16(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rn = (opCode >> 0) & 0xf; + Rd = (opCode >> 12) & 0xf; + SatImm = (opCode >> 16) & 0xf; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCode32Simd.cs b/src/ARMeilleure/Decoders/OpCode32Simd.cs new file mode 100644 index 00000000..636aa0a8 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32Simd.cs @@ -0,0 +1,33 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32Simd : OpCode32SimdBase + { + public int Opc { get; protected set; } + public bool Q { get; protected set; } + public bool F { get; protected set; } + public bool U { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32Simd(inst, address, opCode, false); + public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32Simd(inst, address, opCode, true); + + public OpCode32Simd(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) + { + Size = (opCode >> 20) & 0x3; + Q = ((opCode >> 6) & 0x1) != 0; + F = ((opCode >> 10) & 0x1) != 0; + U = ((opCode >> (isThumb ? 28 : 24)) & 0x1) != 0; + Opc = (opCode >> 7) & 0x3; + + RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64; + + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf); + + // Subclasses have their own handling of Vx to account for before checking. + if (GetType() == typeof(OpCode32Simd) && DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdBase.cs b/src/ARMeilleure/Decoders/OpCode32SimdBase.cs new file mode 100644 index 00000000..4382fc2a --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdBase.cs @@ -0,0 +1,55 @@ +using System; + +namespace ARMeilleure.Decoders +{ + abstract class OpCode32SimdBase : OpCode32, IOpCode32Simd + { + public int Vd { get; protected set; } + public int Vm { get; protected set; } + public int Size { get; protected set; } + + // Helpers to index doublewords within quad words. Essentially, looping over the vector starts at quadword Q and index Fx or Ix within it, + // depending on instruction type. + // + // Qx: The quadword register that the target vector is contained in. + // Ix: The starting index of the target vector within the quadword, with size treated as integer. + // Fx: The starting index of the target vector within the quadword, with size treated as floating point. (16 or 32) + public int Qd => GetQuadwordIndex(Vd); + public int Id => GetQuadwordSubindex(Vd) << (3 - Size); + public int Fd => GetQuadwordSubindex(Vd) << (1 - (Size & 1)); // When the top bit is truncated, 1 is fp16 which is an optional extension in ARMv8.2. We always assume 64. + + public int Qm => GetQuadwordIndex(Vm); + public int Im => GetQuadwordSubindex(Vm) << (3 - Size); + public int Fm => GetQuadwordSubindex(Vm) << (1 - (Size & 1)); + + protected int GetQuadwordIndex(int index) + { + switch (RegisterSize) + { + case RegisterSize.Simd128: + case RegisterSize.Simd64: + return index >> 1; + } + + throw new InvalidOperationException(); + } + + protected int GetQuadwordSubindex(int index) + { + switch (RegisterSize) + { + case RegisterSize.Simd128: + return 0; + case RegisterSize.Simd64: + return index & 1; + } + + throw new InvalidOperationException(); + } + + protected OpCode32SimdBase(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode) + { + IsThumb = isThumb; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdBinary.cs b/src/ARMeilleure/Decoders/OpCode32SimdBinary.cs new file mode 100644 index 00000000..ba190de9 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdBinary.cs @@ -0,0 +1,21 @@ +namespace ARMeilleure.Decoders +{ + /// <summary> + /// A special alias that always runs in 64 bit int, to speed up binary ops a little. + /// </summary> + class OpCode32SimdBinary : OpCode32SimdReg + { + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdBinary(inst, address, opCode, false); + public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdBinary(inst, address, opCode, true); + + public OpCode32SimdBinary(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) + { + Size = 3; + + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm, Vn)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdCmpZ.cs b/src/ARMeilleure/Decoders/OpCode32SimdCmpZ.cs new file mode 100644 index 00000000..445e6781 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdCmpZ.cs @@ -0,0 +1,18 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdCmpZ : OpCode32Simd + { + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdCmpZ(inst, address, opCode, false); + public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdCmpZ(inst, address, opCode, true); + + public OpCode32SimdCmpZ(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) + { + Size = (opCode >> 18) & 0x3; + + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs b/src/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs new file mode 100644 index 00000000..41cf4d88 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdCvtFI.cs @@ -0,0 +1,24 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdCvtFI : OpCode32SimdS + { + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdCvtFI(inst, address, opCode, false); + public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdCvtFI(inst, address, opCode, true); + + public OpCode32SimdCvtFI(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) + { + Opc = (opCode >> 7) & 0x1; + + bool toInteger = (Opc2 & 0b100) != 0; + + if (toInteger) + { + Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e); + } + else + { + Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e); + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdCvtTB.cs b/src/ARMeilleure/Decoders/OpCode32SimdCvtTB.cs new file mode 100644 index 00000000..a95b32ab --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdCvtTB.cs @@ -0,0 +1,44 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdCvtTB : OpCode32, IOpCode32Simd + { + public int Vd { get; } + public int Vm { get; } + public bool Op { get; } // Convert to Half / Convert from Half + public bool T { get; } // Top / Bottom + public int Size { get; } // Double / Single + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdCvtTB(inst, address, opCode, false); + public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdCvtTB(inst, address, opCode, true); + + public OpCode32SimdCvtTB(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode) + { + IsThumb = isThumb; + + Op = ((opCode >> 16) & 0x1) != 0; + T = ((opCode >> 7) & 0x1) != 0; + Size = ((opCode >> 8) & 0x1); + + RegisterSize = Size == 1 ? RegisterSize.Int64 : RegisterSize.Int32; + + if (Size == 1) + { + if (Op) + { + Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf); + Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e); + } + else + { + Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e); + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + } + } + else + { + Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e); + Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e); + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCode32SimdDupElem.cs b/src/ARMeilleure/Decoders/OpCode32SimdDupElem.cs new file mode 100644 index 00000000..c455b5b4 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdDupElem.cs @@ -0,0 +1,43 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdDupElem : OpCode32Simd + { + public int Index { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdDupElem(inst, address, opCode, false); + public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdDupElem(inst, address, opCode, true); + + public OpCode32SimdDupElem(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) + { + var opc = (opCode >> 16) & 0xf; + + if ((opc & 0b1) == 1) + { + Size = 0; + Index = (opc >> 1) & 0x7; + } + else if ((opc & 0b11) == 0b10) + { + Size = 1; + Index = (opc >> 2) & 0x3; + } + else if ((opc & 0b111) == 0b100) + { + Size = 2; + Index = (opc >> 3) & 0x1; + } + else + { + Instruction = InstDescriptor.Undefined; + } + + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf); + + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdDupGP.cs b/src/ARMeilleure/Decoders/OpCode32SimdDupGP.cs new file mode 100644 index 00000000..31546ea3 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdDupGP.cs @@ -0,0 +1,36 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdDupGP : OpCode32, IOpCode32Simd + { + public int Size { get; } + public int Vd { get; } + public int Rt { get; } + public bool Q { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdDupGP(inst, address, opCode, false); + public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdDupGP(inst, address, opCode, true); + + public OpCode32SimdDupGP(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode) + { + IsThumb = isThumb; + + Size = 2 - (((opCode >> 21) & 0x2) | ((opCode >> 5) & 0x1)); // B:E - 0 for 32, 16 then 8. + if (Size == -1) + { + Instruction = InstDescriptor.Undefined; + return; + } + Q = ((opCode >> 21) & 0x1) != 0; + + RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64; + + Vd = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf); + Rt = ((opCode >> 12) & 0xf); + + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdExt.cs b/src/ARMeilleure/Decoders/OpCode32SimdExt.cs new file mode 100644 index 00000000..6dbb5b66 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdExt.cs @@ -0,0 +1,20 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdExt : OpCode32SimdReg + { + public int Immediate { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdExt(inst, address, opCode, false); + public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdExt(inst, address, opCode, true); + + public OpCode32SimdExt(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) + { + Immediate = (opCode >> 8) & 0xf; + Size = 0; + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm, Vn) || (!Q && Immediate > 7)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdImm.cs b/src/ARMeilleure/Decoders/OpCode32SimdImm.cs new file mode 100644 index 00000000..bf0ca527 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdImm.cs @@ -0,0 +1,38 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdImm : OpCode32SimdBase, IOpCode32SimdImm + { + public bool Q { get; } + public long Immediate { get; } + public int Elems => GetBytesCount() >> Size; + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdImm(inst, address, opCode, false); + public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdImm(inst, address, opCode, true); + + public OpCode32SimdImm(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) + { + Vd = (opCode >> 12) & 0xf; + Vd |= (opCode >> 18) & 0x10; + + Q = ((opCode >> 6) & 0x1) > 0; + + int cMode = (opCode >> 8) & 0xf; + int op = (opCode >> 5) & 0x1; + + long imm; + + imm = ((uint)opCode >> 0) & 0xf; + imm |= ((uint)opCode >> 12) & 0x70; + imm |= ((uint)opCode >> (isThumb ? 21 : 17)) & 0x80; + + (Immediate, Size) = OpCodeSimdHelper.GetSimdImmediateAndSize(cMode, op, imm); + + RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64; + + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdImm44.cs b/src/ARMeilleure/Decoders/OpCode32SimdImm44.cs new file mode 100644 index 00000000..fa00a935 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdImm44.cs @@ -0,0 +1,41 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdImm44 : OpCode32, IOpCode32SimdImm + { + public int Vd { get; } + public long Immediate { get; } + public int Size { get; } + public int Elems { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdImm44(inst, address, opCode, false); + public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdImm44(inst, address, opCode, true); + + public OpCode32SimdImm44(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode) + { + IsThumb = isThumb; + + Size = (opCode >> 8) & 0x3; + + bool single = Size != 3; + + if (single) + { + Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e); + } + else + { + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + } + + long imm; + + imm = ((uint)opCode >> 0) & 0xf; + imm |= ((uint)opCode >> 12) & 0xf0; + + Immediate = (Size == 3) ? (long)DecoderHelper.Imm8ToFP64Table[(int)imm] : DecoderHelper.Imm8ToFP32Table[(int)imm]; + + RegisterSize = (!single) ? RegisterSize.Int64 : RegisterSize.Int32; + Elems = 1; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdLong.cs b/src/ARMeilleure/Decoders/OpCode32SimdLong.cs new file mode 100644 index 00000000..8d64d673 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdLong.cs @@ -0,0 +1,30 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdLong : OpCode32SimdBase + { + public bool U { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdLong(inst, address, opCode, false); + public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdLong(inst, address, opCode, true); + + public OpCode32SimdLong(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) + { + int imm3h = (opCode >> 19) & 0x7; + + // The value must be a power of 2, otherwise it is the encoding of another instruction. + switch (imm3h) + { + case 1: Size = 0; break; + case 2: Size = 1; break; + case 4: Size = 2; break; + } + + U = ((opCode >> (isThumb ? 28 : 24)) & 0x1) != 0; + + RegisterSize = RegisterSize.Simd64; + + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf); + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdMemImm.cs b/src/ARMeilleure/Decoders/OpCode32SimdMemImm.cs new file mode 100644 index 00000000..c933a5ad --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdMemImm.cs @@ -0,0 +1,40 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdMemImm : OpCode32, IOpCode32Simd + { + public int Vd { get; } + public int Rn { get; } + public int Size { get; } + public bool Add { get; } + public int Immediate { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMemImm(inst, address, opCode, false); + public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMemImm(inst, address, opCode, true); + + public OpCode32SimdMemImm(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode) + { + IsThumb = isThumb; + + Immediate = opCode & 0xff; + + Rn = (opCode >> 16) & 0xf; + Size = (opCode >> 8) & 0x3; + + Immediate <<= (Size == 1) ? 1 : 2; + + bool u = (opCode & (1 << 23)) != 0; + Add = u; + + bool single = Size != 3; + + if (single) + { + Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e); + } + else + { + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdMemMult.cs b/src/ARMeilleure/Decoders/OpCode32SimdMemMult.cs new file mode 100644 index 00000000..a16a03d3 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdMemMult.cs @@ -0,0 +1,76 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdMemMult : OpCode32 + { + public int Rn { get; } + public int Vd { get; } + + public int RegisterRange { get; } + public int Offset { get; } + public int PostOffset { get; } + public bool IsLoad { get; } + public bool DoubleWidth { get; } + public bool Add { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMemMult(inst, address, opCode, false); + public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMemMult(inst, address, opCode, true); + + public OpCode32SimdMemMult(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode) + { + IsThumb = isThumb; + + Rn = (opCode >> 16) & 0xf; + + bool isLoad = (opCode & (1 << 20)) != 0; + bool w = (opCode & (1 << 21)) != 0; + bool u = (opCode & (1 << 23)) != 0; + bool p = (opCode & (1 << 24)) != 0; + + if (p == u && w) + { + Instruction = InstDescriptor.Undefined; + return; + } + + DoubleWidth = (opCode & (1 << 8)) != 0; + + if (!DoubleWidth) + { + Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e); + } + else + { + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + } + + Add = u; + + RegisterRange = opCode & 0xff; + + int regsSize = RegisterRange * 4; // Double mode is still measured in single register size. + + if (!u) + { + Offset -= regsSize; + } + + if (w) + { + PostOffset = u ? regsSize : -regsSize; + } + else + { + PostOffset = 0; + } + + IsLoad = isLoad; + + int regs = DoubleWidth ? RegisterRange / 2 : RegisterRange; + + if (RegisterRange == 0 || RegisterRange > 32 || Vd + regs > 32) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdMemPair.cs b/src/ARMeilleure/Decoders/OpCode32SimdMemPair.cs new file mode 100644 index 00000000..da88eed2 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdMemPair.cs @@ -0,0 +1,50 @@ +using ARMeilleure.State; + +namespace ARMeilleure.Decoders +{ + class OpCode32SimdMemPair : OpCode32, IOpCode32Simd + { + private static int[] _regsMap = + { + 1, 1, 4, 2, + 1, 1, 3, 1, + 1, 1, 2, 1, + 1, 1, 1, 1 + }; + + public int Vd { get; } + public int Rn { get; } + public int Rm { get; } + public int Align { get; } + public bool WBack { get; } + public bool RegisterIndex { get; } + public int Size { get; } + public int Elems => 8 >> Size; + public int Regs { get; } + public int Increment { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMemPair(inst, address, opCode, false); + public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMemPair(inst, address, opCode, true); + + public OpCode32SimdMemPair(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode) + { + IsThumb = isThumb; + + Vd = (opCode >> 12) & 0xf; + Vd |= (opCode >> 18) & 0x10; + + Size = (opCode >> 6) & 0x3; + + Align = (opCode >> 4) & 0x3; + Rm = (opCode >> 0) & 0xf; + Rn = (opCode >> 16) & 0xf; + + WBack = Rm != RegisterAlias.Aarch32Pc; + RegisterIndex = Rm != RegisterAlias.Aarch32Pc && Rm != RegisterAlias.Aarch32Sp; + + Regs = _regsMap[(opCode >> 8) & 0xf]; + + Increment = ((opCode >> 8) & 0x1) + 1; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdMemSingle.cs b/src/ARMeilleure/Decoders/OpCode32SimdMemSingle.cs new file mode 100644 index 00000000..35dd41c2 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdMemSingle.cs @@ -0,0 +1,51 @@ +using ARMeilleure.State; + +namespace ARMeilleure.Decoders +{ + class OpCode32SimdMemSingle : OpCode32, IOpCode32Simd + { + public int Vd { get; } + public int Rn { get; } + public int Rm { get; } + public int IndexAlign { get; } + public int Index { get; } + public bool WBack { get; } + public bool RegisterIndex { get; } + public int Size { get; } + public bool Replicate { get; } + public int Increment { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMemSingle(inst, address, opCode, false); + public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMemSingle(inst, address, opCode, true); + + public OpCode32SimdMemSingle(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode) + { + IsThumb = isThumb; + + Vd = (opCode >> 12) & 0xf; + Vd |= (opCode >> 18) & 0x10; + + IndexAlign = (opCode >> 4) & 0xf; + + Size = (opCode >> 10) & 0x3; + Replicate = Size == 3; + if (Replicate) + { + Size = (opCode >> 6) & 0x3; + Increment = ((opCode >> 5) & 1) + 1; + Index = 0; + } + else + { + Increment = (((IndexAlign >> Size) & 1) == 0) ? 1 : 2; + Index = IndexAlign >> (1 + Size); + } + + Rm = (opCode >> 0) & 0xf; + Rn = (opCode >> 16) & 0xf; + + WBack = Rm != RegisterAlias.Aarch32Pc; + RegisterIndex = Rm != RegisterAlias.Aarch32Pc && Rm != RegisterAlias.Aarch32Sp; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdMovGp.cs b/src/ARMeilleure/Decoders/OpCode32SimdMovGp.cs new file mode 100644 index 00000000..5afd3488 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdMovGp.cs @@ -0,0 +1,31 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdMovGp : OpCode32, IOpCode32Simd + { + public int Size => 2; + + public int Vn { get; } + public int Rt { get; } + public int Op { get; } + + public int Opc1 { get; } + public int Opc2 { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovGp(inst, address, opCode, false); + public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovGp(inst, address, opCode, true); + + public OpCode32SimdMovGp(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode) + { + IsThumb = isThumb; + + // Which one is used is instruction dependant. + Op = (opCode >> 20) & 0x1; + + Opc1 = (opCode >> 21) & 0x3; + Opc2 = (opCode >> 5) & 0x3; + + Vn = ((opCode >> 7) & 0x1) | ((opCode >> 15) & 0x1e); + Rt = (opCode >> 12) & 0xf; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdMovGpDouble.cs b/src/ARMeilleure/Decoders/OpCode32SimdMovGpDouble.cs new file mode 100644 index 00000000..2d693119 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdMovGpDouble.cs @@ -0,0 +1,36 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdMovGpDouble : OpCode32, IOpCode32Simd + { + public int Size => 3; + + public int Vm { get; } + public int Rt { get; } + public int Rt2 { get; } + public int Op { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovGpDouble(inst, address, opCode, false); + public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovGpDouble(inst, address, opCode, true); + + public OpCode32SimdMovGpDouble(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode) + { + IsThumb = isThumb; + + // Which one is used is instruction dependant. + Op = (opCode >> 20) & 0x1; + + Rt = (opCode >> 12) & 0xf; + Rt2 = (opCode >> 16) & 0xf; + + bool single = (opCode & (1 << 8)) == 0; + if (single) + { + Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e); + } + else + { + Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf); + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdMovGpElem.cs b/src/ARMeilleure/Decoders/OpCode32SimdMovGpElem.cs new file mode 100644 index 00000000..7816665f --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdMovGpElem.cs @@ -0,0 +1,51 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdMovGpElem : OpCode32, IOpCode32Simd + { + public int Size { get; } + + public int Vd { get; } + public int Rt { get; } + public int Op { get; } + public bool U { get; } + + public int Index { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovGpElem(inst, address, opCode, false); + public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovGpElem(inst, address, opCode, true); + + public OpCode32SimdMovGpElem(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode) + { + IsThumb = isThumb; + + Op = (opCode >> 20) & 0x1; + U = ((opCode >> 23) & 1) != 0; + + var opc = (((opCode >> 23) & 1) << 4) | (((opCode >> 21) & 0x3) << 2) | ((opCode >> 5) & 0x3); + + if ((opc & 0b01000) == 0b01000) + { + Size = 0; + Index = opc & 0x7; + } + else if ((opc & 0b01001) == 0b00001) + { + Size = 1; + Index = (opc >> 1) & 0x3; + } + else if ((opc & 0b11011) == 0) + { + Size = 2; + Index = (opc >> 2) & 0x1; + } + else + { + Instruction = InstDescriptor.Undefined; + return; + } + + Vd = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf); + Rt = (opCode >> 12) & 0xf; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdMovn.cs b/src/ARMeilleure/Decoders/OpCode32SimdMovn.cs new file mode 100644 index 00000000..576e12cc --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdMovn.cs @@ -0,0 +1,13 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdMovn : OpCode32Simd + { + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovn(inst, address, opCode, false); + public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdMovn(inst, address, opCode, true); + + public OpCode32SimdMovn(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) + { + Size = (opCode >> 18) & 0x3; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdReg.cs b/src/ARMeilleure/Decoders/OpCode32SimdReg.cs new file mode 100644 index 00000000..1c46b0e0 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdReg.cs @@ -0,0 +1,25 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdReg : OpCode32Simd + { + public int Vn { get; } + + public int Qn => GetQuadwordIndex(Vn); + public int In => GetQuadwordSubindex(Vn) << (3 - Size); + public int Fn => GetQuadwordSubindex(Vn) << (1 - (Size & 1)); + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdReg(inst, address, opCode, false); + public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdReg(inst, address, opCode, true); + + public OpCode32SimdReg(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) + { + Vn = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf); + + // Subclasses have their own handling of Vx to account for before checking. + if (GetType() == typeof(OpCode32SimdReg) && DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm, Vn)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdRegElem.cs b/src/ARMeilleure/Decoders/OpCode32SimdRegElem.cs new file mode 100644 index 00000000..173c5265 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdRegElem.cs @@ -0,0 +1,31 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdRegElem : OpCode32SimdReg + { + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegElem(inst, address, opCode, false); + public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegElem(inst, address, opCode, true); + + public OpCode32SimdRegElem(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) + { + Q = ((opCode >> (isThumb ? 28 : 24)) & 0x1) != 0; + F = ((opCode >> 8) & 0x1) != 0; + Size = (opCode >> 20) & 0x3; + + RegisterSize = Q ? RegisterSize.Simd128 : RegisterSize.Simd64; + + if (Size == 1) + { + Vm = ((opCode >> 3) & 0x1) | ((opCode >> 4) & 0x2) | ((opCode << 2) & 0x1c); + } + else /* if (Size == 2) */ + { + Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e); + } + + if (GetType() == typeof(OpCode32SimdRegElem) && DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vn) || Size == 0 || (Size == 1 && F)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdRegElemLong.cs b/src/ARMeilleure/Decoders/OpCode32SimdRegElemLong.cs new file mode 100644 index 00000000..b87ac413 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdRegElemLong.cs @@ -0,0 +1,22 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdRegElemLong : OpCode32SimdRegElem + { + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegElemLong(inst, address, opCode, false); + public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegElemLong(inst, address, opCode, true); + + public OpCode32SimdRegElemLong(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) + { + Q = false; + F = false; + + RegisterSize = RegisterSize.Simd64; + + // (Vd & 1) != 0 || Size == 3 are also invalid, but they are checked on encoding. + if (Size == 0) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdRegLong.cs b/src/ARMeilleure/Decoders/OpCode32SimdRegLong.cs new file mode 100644 index 00000000..11069383 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdRegLong.cs @@ -0,0 +1,24 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdRegLong : OpCode32SimdReg + { + public bool Polynomial { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegLong(inst, address, opCode, false); + public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegLong(inst, address, opCode, true); + + public OpCode32SimdRegLong(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) + { + Q = false; + RegisterSize = RegisterSize.Simd64; + + Polynomial = ((opCode >> 9) & 0x1) != 0; + + // Subclasses have their own handling of Vx to account for before checking. + if (GetType() == typeof(OpCode32SimdRegLong) && DecoderHelper.VectorArgumentsInvalid(true, Vd)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdRegS.cs b/src/ARMeilleure/Decoders/OpCode32SimdRegS.cs new file mode 100644 index 00000000..8168e83f --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdRegS.cs @@ -0,0 +1,23 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdRegS : OpCode32SimdS + { + public int Vn { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegS(inst, address, opCode, false); + public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegS(inst, address, opCode, true); + + public OpCode32SimdRegS(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) + { + bool single = Size != 3; + if (single) + { + Vn = ((opCode >> 7) & 0x1) | ((opCode >> 15) & 0x1e); + } + else + { + Vn = ((opCode >> 3) & 0x10) | ((opCode >> 16) & 0xf); + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdRegWide.cs b/src/ARMeilleure/Decoders/OpCode32SimdRegWide.cs new file mode 100644 index 00000000..fd2b3bf1 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdRegWide.cs @@ -0,0 +1,20 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdRegWide : OpCode32SimdReg + { + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegWide(inst, address, opCode, false); + public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRegWide(inst, address, opCode, true); + + public OpCode32SimdRegWide(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) + { + Q = false; + RegisterSize = RegisterSize.Simd64; + + // Subclasses have their own handling of Vx to account for before checking. + if (GetType() == typeof(OpCode32SimdRegWide) && DecoderHelper.VectorArgumentsInvalid(true, Vd, Vn)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdRev.cs b/src/ARMeilleure/Decoders/OpCode32SimdRev.cs new file mode 100644 index 00000000..cb64765f --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdRev.cs @@ -0,0 +1,23 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdRev : OpCode32SimdCmpZ + { + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRev(inst, address, opCode, false); + public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdRev(inst, address, opCode, true); + + public OpCode32SimdRev(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) + { + if (Opc + Size >= 3) + { + Instruction = InstDescriptor.Undefined; + return; + } + + // Currently, this instruction is treated as though it's OPCODE is the true size, + // which lets us deal with reversing vectors on a single element basis (eg. math magic an I64 rather than insert lots of I8s). + int tempSize = Size; + Size = 3 - Opc; // Op 0 is 64 bit, 1 is 32 and so on. + Opc = tempSize; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdS.cs b/src/ARMeilleure/Decoders/OpCode32SimdS.cs new file mode 100644 index 00000000..63c03c01 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdS.cs @@ -0,0 +1,39 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdS : OpCode32, IOpCode32Simd + { + public int Vd { get; protected set; } + public int Vm { get; protected set; } + public int Opc { get; protected set; } // "with_zero" (Opc<1>) [Vcmp, Vcmpe]. + public int Opc2 { get; } // opc2 or RM (opc2<1:0>) [Vcvt, Vrint]. + public int Size { get; protected set; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdS(inst, address, opCode, false); + public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdS(inst, address, opCode, true); + + public OpCode32SimdS(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode) + { + IsThumb = isThumb; + + Opc = (opCode >> 15) & 0x3; + Opc2 = (opCode >> 16) & 0x7; + + Size = (opCode >> 8) & 0x3; + + bool single = Size != 3; + + RegisterSize = single ? RegisterSize.Int32 : RegisterSize.Int64; + + if (single) + { + Vm = ((opCode >> 5) & 0x1) | ((opCode << 1) & 0x1e); + Vd = ((opCode >> 22) & 0x1) | ((opCode >> 11) & 0x1e); + } + else + { + Vm = ((opCode >> 1) & 0x10) | ((opCode >> 0) & 0xf); + Vd = ((opCode >> 18) & 0x10) | ((opCode >> 12) & 0xf); + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdSel.cs b/src/ARMeilleure/Decoders/OpCode32SimdSel.cs new file mode 100644 index 00000000..37fd714a --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdSel.cs @@ -0,0 +1,23 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdSel : OpCode32SimdRegS + { + public OpCode32SimdSelMode Cc { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdSel(inst, address, opCode, false); + public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdSel(inst, address, opCode, true); + + public OpCode32SimdSel(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) + { + Cc = (OpCode32SimdSelMode)((opCode >> 20) & 3); + } + } + + enum OpCode32SimdSelMode : int + { + Eq = 0, + Vs, + Ge, + Gt + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdShImm.cs b/src/ARMeilleure/Decoders/OpCode32SimdShImm.cs new file mode 100644 index 00000000..55ddc395 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdShImm.cs @@ -0,0 +1,46 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdShImm : OpCode32Simd + { + public int Shift { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdShImm(inst, address, opCode, false); + public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdShImm(inst, address, opCode, true); + + public OpCode32SimdShImm(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) + { + int imm6 = (opCode >> 16) & 0x3f; + int limm6 = ((opCode >> 1) & 0x40) | imm6; + + if ((limm6 & 0x40) == 0b1000000) + { + Size = 3; + Shift = imm6; + } + else if ((limm6 & 0x60) == 0b0100000) + { + Size = 2; + Shift = imm6 - 32; + } + else if ((limm6 & 0x70) == 0b0010000) + { + Size = 1; + Shift = imm6 - 16; + } + else if ((limm6 & 0x78) == 0b0001000) + { + Size = 0; + Shift = imm6 - 8; + } + else + { + Instruction = InstDescriptor.Undefined; + } + + if (GetType() == typeof(OpCode32SimdShImm) && DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdShImmLong.cs b/src/ARMeilleure/Decoders/OpCode32SimdShImmLong.cs new file mode 100644 index 00000000..6b1b0ad1 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdShImmLong.cs @@ -0,0 +1,43 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdShImmLong : OpCode32Simd + { + public int Shift { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdShImmLong(inst, address, opCode, false); + public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdShImmLong(inst, address, opCode, true); + + public OpCode32SimdShImmLong(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) + { + Q = false; + RegisterSize = RegisterSize.Simd64; + + int imm6 = (opCode >> 16) & 0x3f; + + if ((imm6 & 0x20) == 0b100000) + { + Size = 2; + Shift = imm6 - 32; + } + else if ((imm6 & 0x30) == 0b010000) + { + Size = 1; + Shift = imm6 - 16; + } + else if ((imm6 & 0x38) == 0b001000) + { + Size = 0; + Shift = imm6 - 8; + } + else + { + Instruction = InstDescriptor.Undefined; + } + + if (GetType() == typeof(OpCode32SimdShImmLong) && DecoderHelper.VectorArgumentsInvalid(true, Vd)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdShImmNarrow.cs b/src/ARMeilleure/Decoders/OpCode32SimdShImmNarrow.cs new file mode 100644 index 00000000..5351e65f --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdShImmNarrow.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdShImmNarrow : OpCode32SimdShImm + { + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdShImmNarrow(inst, address, opCode, false); + public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdShImmNarrow(inst, address, opCode, true); + + public OpCode32SimdShImmNarrow(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) { } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdSpecial.cs b/src/ARMeilleure/Decoders/OpCode32SimdSpecial.cs new file mode 100644 index 00000000..61a9f387 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdSpecial.cs @@ -0,0 +1,19 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdSpecial : OpCode32 + { + public int Rt { get; } + public int Sreg { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdSpecial(inst, address, opCode, false); + public static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdSpecial(inst, address, opCode, true); + + public OpCode32SimdSpecial(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode) + { + IsThumb = isThumb; + + Rt = (opCode >> 12) & 0xf; + Sreg = (opCode >> 16) & 0xf; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdSqrte.cs b/src/ARMeilleure/Decoders/OpCode32SimdSqrte.cs new file mode 100644 index 00000000..5b715535 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdSqrte.cs @@ -0,0 +1,19 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdSqrte : OpCode32Simd + { + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdSqrte(inst, address, opCode, false); + public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdSqrte(inst, address, opCode, true); + + public OpCode32SimdSqrte(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) + { + Size = (opCode >> 18) & 0x1; + F = ((opCode >> 8) & 0x1) != 0; + + if (DecoderHelper.VectorArgumentsInvalid(Q, Vd, Vm)) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32SimdTbl.cs b/src/ARMeilleure/Decoders/OpCode32SimdTbl.cs new file mode 100644 index 00000000..c4fb4b9c --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32SimdTbl.cs @@ -0,0 +1,24 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32SimdTbl : OpCode32SimdReg + { + public int Length { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdTbl(inst, address, opCode, false); + public new static OpCode CreateT32(InstDescriptor inst, ulong address, int opCode) => new OpCode32SimdTbl(inst, address, opCode, true); + + public OpCode32SimdTbl(InstDescriptor inst, ulong address, int opCode, bool isThumb) : base(inst, address, opCode, isThumb) + { + Length = (opCode >> 8) & 3; + Size = 0; + Opc = Q ? 1 : 0; + Q = false; + RegisterSize = RegisterSize.Simd64; + + if (Vn + Length + 1 > 32) + { + Instruction = InstDescriptor.Undefined; + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCode32System.cs b/src/ARMeilleure/Decoders/OpCode32System.cs new file mode 100644 index 00000000..89e93349 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCode32System.cs @@ -0,0 +1,28 @@ +namespace ARMeilleure.Decoders +{ + class OpCode32System : OpCode32 + { + public int Opc1 { get; } + public int CRn { get; } + public int Rt { get; } + public int Opc2 { get; } + public int CRm { get; } + public int MrrcOp { get; } + + public int Coproc { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCode32System(inst, address, opCode); + + public OpCode32System(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Opc1 = (opCode >> 21) & 0x7; + CRn = (opCode >> 16) & 0xf; + Rt = (opCode >> 12) & 0xf; + Opc2 = (opCode >> 5) & 0x7; + CRm = (opCode >> 0) & 0xf; + MrrcOp = (opCode >> 4) & 0xf; + + Coproc = (opCode >> 8) & 0xf; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeAdr.cs b/src/ARMeilleure/Decoders/OpCodeAdr.cs new file mode 100644 index 00000000..9655c766 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeAdr.cs @@ -0,0 +1,19 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeAdr : OpCode + { + public int Rd { get; } + + public long Immediate { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeAdr(inst, address, opCode); + + public OpCodeAdr(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = opCode & 0x1f; + + Immediate = DecoderHelper.DecodeImmS19_2(opCode); + Immediate |= ((long)opCode >> 29) & 3; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeAlu.cs b/src/ARMeilleure/Decoders/OpCodeAlu.cs new file mode 100644 index 00000000..4d7f03a7 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeAlu.cs @@ -0,0 +1,23 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeAlu : OpCode, IOpCodeAlu + { + public int Rd { get; protected set; } + public int Rn { get; } + + public DataOp DataOp { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeAlu(inst, address, opCode); + + public OpCodeAlu(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 0) & 0x1f; + Rn = (opCode >> 5) & 0x1f; + DataOp = (DataOp)((opCode >> 24) & 0x3); + + RegisterSize = (opCode >> 31) != 0 + ? RegisterSize.Int64 + : RegisterSize.Int32; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeAluBinary.cs b/src/ARMeilleure/Decoders/OpCodeAluBinary.cs new file mode 100644 index 00000000..e8b10656 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeAluBinary.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeAluBinary : OpCodeAlu + { + public int Rm { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeAluBinary(inst, address, opCode); + + public OpCodeAluBinary(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 16) & 0x1f; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeAluImm.cs b/src/ARMeilleure/Decoders/OpCodeAluImm.cs new file mode 100644 index 00000000..91aa9553 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeAluImm.cs @@ -0,0 +1,40 @@ +using System; + +namespace ARMeilleure.Decoders +{ + class OpCodeAluImm : OpCodeAlu, IOpCodeAluImm + { + public long Immediate { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeAluImm(inst, address, opCode); + + public OpCodeAluImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + if (DataOp == DataOp.Arithmetic) + { + Immediate = (opCode >> 10) & 0xfff; + + int shift = (opCode >> 22) & 3; + + Immediate <<= shift * 12; + } + else if (DataOp == DataOp.Logical) + { + var bm = DecoderHelper.DecodeBitMask(opCode, true); + + if (bm.IsUndefined) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + Immediate = bm.WMask; + } + else + { + throw new ArgumentException(nameof(opCode)); + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeAluRs.cs b/src/ARMeilleure/Decoders/OpCodeAluRs.cs new file mode 100644 index 00000000..94983336 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeAluRs.cs @@ -0,0 +1,29 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeAluRs : OpCodeAlu, IOpCodeAluRs + { + public int Shift { get; } + public int Rm { get; } + + public ShiftType ShiftType { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeAluRs(inst, address, opCode); + + public OpCodeAluRs(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int shift = (opCode >> 10) & 0x3f; + + if (shift >= GetBitsCount()) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + Shift = shift; + + Rm = (opCode >> 16) & 0x1f; + ShiftType = (ShiftType)((opCode >> 22) & 0x3); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeAluRx.cs b/src/ARMeilleure/Decoders/OpCodeAluRx.cs new file mode 100644 index 00000000..d39da9e7 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeAluRx.cs @@ -0,0 +1,19 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeAluRx : OpCodeAlu, IOpCodeAluRx + { + public int Shift { get; } + public int Rm { get; } + + public IntType IntType { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeAluRx(inst, address, opCode); + + public OpCodeAluRx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Shift = (opCode >> 10) & 0x7; + IntType = (IntType)((opCode >> 13) & 0x7); + Rm = (opCode >> 16) & 0x1f; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeBImm.cs b/src/ARMeilleure/Decoders/OpCodeBImm.cs new file mode 100644 index 00000000..e302516e --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeBImm.cs @@ -0,0 +1,11 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeBImm : OpCode, IOpCodeBImm + { + public long Immediate { get; protected set; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeBImm(inst, address, opCode); + + public OpCodeBImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeBImmAl.cs b/src/ARMeilleure/Decoders/OpCodeBImmAl.cs new file mode 100644 index 00000000..47ae5f56 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeBImmAl.cs @@ -0,0 +1,12 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeBImmAl : OpCodeBImm + { + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeBImmAl(inst, address, opCode); + + public OpCodeBImmAl(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Immediate = (long)address + DecoderHelper.DecodeImm26_2(opCode); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeBImmCmp.cs b/src/ARMeilleure/Decoders/OpCodeBImmCmp.cs new file mode 100644 index 00000000..a5246569 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeBImmCmp.cs @@ -0,0 +1,20 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeBImmCmp : OpCodeBImm + { + public int Rt { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeBImmCmp(inst, address, opCode); + + public OpCodeBImmCmp(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt = opCode & 0x1f; + + Immediate = (long)address + DecoderHelper.DecodeImmS19_2(opCode); + + RegisterSize = (opCode >> 31) != 0 + ? RegisterSize.Int64 + : RegisterSize.Int32; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeBImmCond.cs b/src/ARMeilleure/Decoders/OpCodeBImmCond.cs new file mode 100644 index 00000000..b57a7ea8 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeBImmCond.cs @@ -0,0 +1,25 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeBImmCond : OpCodeBImm, IOpCodeCond + { + public Condition Cond { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeBImmCond(inst, address, opCode); + + public OpCodeBImmCond(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int o0 = (opCode >> 4) & 1; + + if (o0 != 0) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + Cond = (Condition)(opCode & 0xf); + + Immediate = (long)address + DecoderHelper.DecodeImmS19_2(opCode); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeBImmTest.cs b/src/ARMeilleure/Decoders/OpCodeBImmTest.cs new file mode 100644 index 00000000..bad98405 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeBImmTest.cs @@ -0,0 +1,20 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeBImmTest : OpCodeBImm + { + public int Rt { get; } + public int Bit { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeBImmTest(inst, address, opCode); + + public OpCodeBImmTest(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt = opCode & 0x1f; + + Immediate = (long)address + DecoderHelper.DecodeImmS14_2(opCode); + + Bit = (opCode >> 19) & 0x1f; + Bit |= (opCode >> 26) & 0x20; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeBReg.cs b/src/ARMeilleure/Decoders/OpCodeBReg.cs new file mode 100644 index 00000000..b5dcbfd8 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeBReg.cs @@ -0,0 +1,24 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeBReg : OpCode + { + public int Rn { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeBReg(inst, address, opCode); + + public OpCodeBReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int op4 = (opCode >> 0) & 0x1f; + int op2 = (opCode >> 16) & 0x1f; + + if (op2 != 0b11111 || op4 != 0b00000) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + Rn = (opCode >> 5) & 0x1f; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeBfm.cs b/src/ARMeilleure/Decoders/OpCodeBfm.cs new file mode 100644 index 00000000..8e1c7836 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeBfm.cs @@ -0,0 +1,29 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeBfm : OpCodeAlu + { + public long WMask { get; } + public long TMask { get; } + public int Pos { get; } + public int Shift { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeBfm(inst, address, opCode); + + public OpCodeBfm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + var bm = DecoderHelper.DecodeBitMask(opCode, false); + + if (bm.IsUndefined) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + WMask = bm.WMask; + TMask = bm.TMask; + Pos = bm.Pos; + Shift = bm.Shift; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeCcmp.cs b/src/ARMeilleure/Decoders/OpCodeCcmp.cs new file mode 100644 index 00000000..aa47146f --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeCcmp.cs @@ -0,0 +1,32 @@ +using ARMeilleure.State; + +namespace ARMeilleure.Decoders +{ + class OpCodeCcmp : OpCodeAlu, IOpCodeCond + { + public int Nzcv { get; } + protected int RmImm; + + public Condition Cond { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeCcmp(inst, address, opCode); + + public OpCodeCcmp(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int o3 = (opCode >> 4) & 1; + + if (o3 != 0) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + Nzcv = (opCode >> 0) & 0xf; + Cond = (Condition)((opCode >> 12) & 0xf); + RmImm = (opCode >> 16) & 0x1f; + + Rd = RegisterAlias.Zr; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeCcmpImm.cs b/src/ARMeilleure/Decoders/OpCodeCcmpImm.cs new file mode 100644 index 00000000..3548f2da --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeCcmpImm.cs @@ -0,0 +1,11 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeCcmpImm : OpCodeCcmp, IOpCodeAluImm + { + public long Immediate => RmImm; + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeCcmpImm(inst, address, opCode); + + public OpCodeCcmpImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeCcmpReg.cs b/src/ARMeilleure/Decoders/OpCodeCcmpReg.cs new file mode 100644 index 00000000..d5df3b10 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeCcmpReg.cs @@ -0,0 +1,15 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeCcmpReg : OpCodeCcmp, IOpCodeAluRs + { + public int Rm => RmImm; + + public int Shift => 0; + + public ShiftType ShiftType => ShiftType.Lsl; + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeCcmpReg(inst, address, opCode); + + public OpCodeCcmpReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) { } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeCsel.cs b/src/ARMeilleure/Decoders/OpCodeCsel.cs new file mode 100644 index 00000000..4b8dc7fd --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeCsel.cs @@ -0,0 +1,17 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeCsel : OpCodeAlu, IOpCodeCond + { + public int Rm { get; } + + public Condition Cond { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeCsel(inst, address, opCode); + + public OpCodeCsel(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 16) & 0x1f; + Cond = (Condition)((opCode >> 12) & 0xf); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeException.cs b/src/ARMeilleure/Decoders/OpCodeException.cs new file mode 100644 index 00000000..6b72138e --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeException.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeException : OpCode + { + public int Id { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeException(inst, address, opCode); + + public OpCodeException(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Id = (opCode >> 5) & 0xffff; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeMem.cs b/src/ARMeilleure/Decoders/OpCodeMem.cs new file mode 100644 index 00000000..0ba2bcd1 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeMem.cs @@ -0,0 +1,19 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeMem : OpCode + { + public int Rt { get; protected set; } + public int Rn { get; protected set; } + public int Size { get; protected set; } + public bool Extend64 { get; protected set; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeMem(inst, address, opCode); + + public OpCodeMem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt = (opCode >> 0) & 0x1f; + Rn = (opCode >> 5) & 0x1f; + Size = (opCode >> 30) & 0x3; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeMemEx.cs b/src/ARMeilleure/Decoders/OpCodeMemEx.cs new file mode 100644 index 00000000..89902485 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeMemEx.cs @@ -0,0 +1,16 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeMemEx : OpCodeMem + { + public int Rt2 { get; } + public int Rs { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeMemEx(inst, address, opCode); + + public OpCodeMemEx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt2 = (opCode >> 10) & 0x1f; + Rs = (opCode >> 16) & 0x1f; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeMemImm.cs b/src/ARMeilleure/Decoders/OpCodeMemImm.cs new file mode 100644 index 00000000..d6ed2282 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeMemImm.cs @@ -0,0 +1,53 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeMemImm : OpCodeMem + { + public long Immediate { get; protected set; } + public bool WBack { get; protected set; } + public bool PostIdx { get; protected set; } + protected bool Unscaled { get; } + + private enum MemOp + { + Unscaled = 0, + PostIndexed = 1, + Unprivileged = 2, + PreIndexed = 3, + Unsigned + } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeMemImm(inst, address, opCode); + + public OpCodeMemImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Extend64 = ((opCode >> 22) & 3) == 2; + WBack = ((opCode >> 24) & 1) == 0; + + // The type is not valid for the Unsigned Immediate 12-bits encoding, + // because the bits 11:10 are used for the larger Immediate offset. + MemOp type = WBack ? (MemOp)((opCode >> 10) & 3) : MemOp.Unsigned; + + PostIdx = type == MemOp.PostIndexed; + Unscaled = type == MemOp.Unscaled || + type == MemOp.Unprivileged; + + // Unscaled and Unprivileged doesn't write back, + // but they do use the 9-bits Signed Immediate. + if (Unscaled) + { + WBack = false; + } + + if (WBack || Unscaled) + { + // 9-bits Signed Immediate. + Immediate = (opCode << 11) >> 23; + } + else + { + // 12-bits Unsigned Immediate. + Immediate = ((opCode >> 10) & 0xfff) << Size; + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeMemLit.cs b/src/ARMeilleure/Decoders/OpCodeMemLit.cs new file mode 100644 index 00000000..986d6634 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeMemLit.cs @@ -0,0 +1,28 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeMemLit : OpCode, IOpCodeLit + { + public int Rt { get; } + public long Immediate { get; } + public int Size { get; } + public bool Signed { get; } + public bool Prefetch { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeMemLit(inst, address, opCode); + + public OpCodeMemLit(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt = opCode & 0x1f; + + Immediate = (long)address + DecoderHelper.DecodeImmS19_2(opCode); + + switch ((opCode >> 30) & 3) + { + case 0: Size = 2; Signed = false; Prefetch = false; break; + case 1: Size = 3; Signed = false; Prefetch = false; break; + case 2: Size = 2; Signed = true; Prefetch = false; break; + case 3: Size = 0; Signed = false; Prefetch = true; break; + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeMemPair.cs b/src/ARMeilleure/Decoders/OpCodeMemPair.cs new file mode 100644 index 00000000..21018033 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeMemPair.cs @@ -0,0 +1,25 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeMemPair : OpCodeMemImm + { + public int Rt2 { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeMemPair(inst, address, opCode); + + public OpCodeMemPair(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt2 = (opCode >> 10) & 0x1f; + WBack = ((opCode >> 23) & 0x1) != 0; + PostIdx = ((opCode >> 23) & 0x3) == 1; + Extend64 = ((opCode >> 30) & 0x3) == 1; + Size = ((opCode >> 31) & 0x1) | 2; + + DecodeImm(opCode); + } + + protected void DecodeImm(int opCode) + { + Immediate = ((long)(opCode >> 15) << 57) >> (57 - Size); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeMemReg.cs b/src/ARMeilleure/Decoders/OpCodeMemReg.cs new file mode 100644 index 00000000..73d6c5d2 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeMemReg.cs @@ -0,0 +1,20 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeMemReg : OpCodeMem + { + public bool Shift { get; } + public int Rm { get; } + + public IntType IntType { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeMemReg(inst, address, opCode); + + public OpCodeMemReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Shift = ((opCode >> 12) & 0x1) != 0; + IntType = (IntType)((opCode >> 13) & 0x7); + Rm = (opCode >> 16) & 0x1f; + Extend64 = ((opCode >> 22) & 0x3) == 2; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeMov.cs b/src/ARMeilleure/Decoders/OpCodeMov.cs new file mode 100644 index 00000000..50af88cb --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeMov.cs @@ -0,0 +1,38 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeMov : OpCode + { + public int Rd { get; } + + public long Immediate { get; } + + public int Bit { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeMov(inst, address, opCode); + + public OpCodeMov(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int p1 = (opCode >> 22) & 1; + int sf = (opCode >> 31) & 1; + + if (sf == 0 && p1 != 0) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + Rd = (opCode >> 0) & 0x1f; + Immediate = (opCode >> 5) & 0xffff; + Bit = (opCode >> 21) & 0x3; + + Bit <<= 4; + + Immediate <<= Bit; + + RegisterSize = (opCode >> 31) != 0 + ? RegisterSize.Int64 + : RegisterSize.Int32; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeMul.cs b/src/ARMeilleure/Decoders/OpCodeMul.cs new file mode 100644 index 00000000..31d140a6 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeMul.cs @@ -0,0 +1,16 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeMul : OpCodeAlu + { + public int Rm { get; } + public int Ra { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeMul(inst, address, opCode); + + public OpCodeMul(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Ra = (opCode >> 10) & 0x1f; + Rm = (opCode >> 16) & 0x1f; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeSimd.cs b/src/ARMeilleure/Decoders/OpCodeSimd.cs new file mode 100644 index 00000000..85713690 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeSimd.cs @@ -0,0 +1,24 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimd : OpCode, IOpCodeSimd + { + public int Rd { get; } + public int Rn { get; } + public int Opc { get; } + public int Size { get; protected set; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimd(inst, address, opCode); + + public OpCodeSimd(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 0) & 0x1f; + Rn = (opCode >> 5) & 0x1f; + Opc = (opCode >> 15) & 0x3; + Size = (opCode >> 22) & 0x3; + + RegisterSize = ((opCode >> 30) & 1) != 0 + ? RegisterSize.Simd128 + : RegisterSize.Simd64; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeSimdCvt.cs b/src/ARMeilleure/Decoders/OpCodeSimdCvt.cs new file mode 100644 index 00000000..05b32941 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeSimdCvt.cs @@ -0,0 +1,21 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdCvt : OpCodeSimd + { + public int FBits { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdCvt(inst, address, opCode); + + public OpCodeSimdCvt(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int scale = (opCode >> 10) & 0x3f; + int sf = (opCode >> 31) & 0x1; + + FBits = 64 - scale; + + RegisterSize = sf != 0 + ? RegisterSize.Int64 + : RegisterSize.Int32; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeSimdExt.cs b/src/ARMeilleure/Decoders/OpCodeSimdExt.cs new file mode 100644 index 00000000..a0e264d9 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeSimdExt.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdExt : OpCodeSimdReg + { + public int Imm4 { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdExt(inst, address, opCode); + + public OpCodeSimdExt(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Imm4 = (opCode >> 11) & 0xf; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeSimdFcond.cs b/src/ARMeilleure/Decoders/OpCodeSimdFcond.cs new file mode 100644 index 00000000..aa16e0c1 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeSimdFcond.cs @@ -0,0 +1,17 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdFcond : OpCodeSimdReg, IOpCodeCond + { + public int Nzcv { get; } + + public Condition Cond { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdFcond(inst, address, opCode); + + public OpCodeSimdFcond(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Nzcv = (opCode >> 0) & 0xf; + Cond = (Condition)((opCode >> 12) & 0xf); + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeSimdFmov.cs b/src/ARMeilleure/Decoders/OpCodeSimdFmov.cs new file mode 100644 index 00000000..9f9062b8 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeSimdFmov.cs @@ -0,0 +1,32 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdFmov : OpCode, IOpCodeSimd + { + public int Rd { get; } + public long Immediate { get; } + public int Size { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdFmov(inst, address, opCode); + + public OpCodeSimdFmov(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int type = (opCode >> 22) & 0x3; + + Size = type; + + long imm; + + Rd = (opCode >> 0) & 0x1f; + imm = (opCode >> 13) & 0xff; + + if (type == 0) + { + Immediate = (long)DecoderHelper.Imm8ToFP32Table[(int)imm]; + } + else /* if (type == 1) */ + { + Immediate = (long)DecoderHelper.Imm8ToFP64Table[(int)imm]; + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeSimdHelper.cs b/src/ARMeilleure/Decoders/OpCodeSimdHelper.cs new file mode 100644 index 00000000..02f74d03 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeSimdHelper.cs @@ -0,0 +1,88 @@ +namespace ARMeilleure.Decoders +{ + public static class OpCodeSimdHelper + { + public static (long Immediate, int Size) GetSimdImmediateAndSize(int cMode, int op, long imm) + { + int modeLow = cMode & 1; + int modeHigh = cMode >> 1; + int size = 0; + + if (modeHigh == 0b111) + { + switch (op | (modeLow << 1)) + { + case 0: + // 64-bits Immediate. + // Transform abcd efgh into abcd efgh abcd efgh ... + size = 3; + imm = (long)((ulong)imm * 0x0101010101010101); + break; + + case 1: + // 64-bits Immediate. + // Transform abcd efgh into aaaa aaaa bbbb bbbb ... + size = 3; + imm = (imm & 0xf0) >> 4 | (imm & 0x0f) << 4; + imm = (imm & 0xcc) >> 2 | (imm & 0x33) << 2; + imm = (imm & 0xaa) >> 1 | (imm & 0x55) << 1; + + imm = (long)((ulong)imm * 0x8040201008040201); + imm = (long)((ulong)imm & 0x8080808080808080); + + imm |= imm >> 4; + imm |= imm >> 2; + imm |= imm >> 1; + break; + + case 2: + // 2 x 32-bits floating point Immediate. + size = 3; + imm = (long)DecoderHelper.Imm8ToFP32Table[(int)imm]; + imm |= imm << 32; + break; + + case 3: + // 64-bits floating point Immediate. + size = 3; + imm = (long)DecoderHelper.Imm8ToFP64Table[(int)imm]; + break; + } + } + else if ((modeHigh & 0b110) == 0b100) + { + // 16-bits shifted Immediate. + size = 1; imm <<= (modeHigh & 1) << 3; + } + else if ((modeHigh & 0b100) == 0b000) + { + // 32-bits shifted Immediate. + size = 2; imm <<= modeHigh << 3; + } + else if ((modeHigh & 0b111) == 0b110) + { + // 32-bits shifted Immediate (fill with ones). + size = 2; imm = ShlOnes(imm, 8 << modeLow); + } + else + { + // 8-bits without shift. + size = 0; + } + + return (imm, size); + } + + private static long ShlOnes(long value, int shift) + { + if (shift != 0) + { + return value << shift | (long)(ulong.MaxValue >> (64 - shift)); + } + else + { + return value; + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeSimdImm.cs b/src/ARMeilleure/Decoders/OpCodeSimdImm.cs new file mode 100644 index 00000000..eeca7709 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeSimdImm.cs @@ -0,0 +1,107 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdImm : OpCode, IOpCodeSimd + { + public int Rd { get; } + public long Immediate { get; } + public int Size { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdImm(inst, address, opCode); + + public OpCodeSimdImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = opCode & 0x1f; + + int cMode = (opCode >> 12) & 0xf; + int op = (opCode >> 29) & 0x1; + + int modeLow = cMode & 1; + int modeHigh = cMode >> 1; + + long imm; + + imm = ((uint)opCode >> 5) & 0x1f; + imm |= ((uint)opCode >> 11) & 0xe0; + + if (modeHigh == 0b111) + { + switch (op | (modeLow << 1)) + { + case 0: + // 64-bits Immediate. + // Transform abcd efgh into abcd efgh abcd efgh ... + Size = 3; + imm = (long)((ulong)imm * 0x0101010101010101); + break; + + case 1: + // 64-bits Immediate. + // Transform abcd efgh into aaaa aaaa bbbb bbbb ... + Size = 3; + imm = (imm & 0xf0) >> 4 | (imm & 0x0f) << 4; + imm = (imm & 0xcc) >> 2 | (imm & 0x33) << 2; + imm = (imm & 0xaa) >> 1 | (imm & 0x55) << 1; + + imm = (long)((ulong)imm * 0x8040201008040201); + imm = (long)((ulong)imm & 0x8080808080808080); + + imm |= imm >> 4; + imm |= imm >> 2; + imm |= imm >> 1; + break; + + case 2: + // 2 x 32-bits floating point Immediate. + Size = 0; + imm = (long)DecoderHelper.Imm8ToFP32Table[(int)imm]; + imm |= imm << 32; + break; + + case 3: + // 64-bits floating point Immediate. + Size = 1; + imm = (long)DecoderHelper.Imm8ToFP64Table[(int)imm]; + break; + } + } + else if ((modeHigh & 0b110) == 0b100) + { + // 16-bits shifted Immediate. + Size = 1; imm <<= (modeHigh & 1) << 3; + } + else if ((modeHigh & 0b100) == 0b000) + { + // 32-bits shifted Immediate. + Size = 2; imm <<= modeHigh << 3; + } + else if ((modeHigh & 0b111) == 0b110) + { + // 32-bits shifted Immediate (fill with ones). + Size = 2; imm = ShlOnes(imm, 8 << modeLow); + } + else + { + // 8-bits without shift. + Size = 0; + } + + Immediate = imm; + + RegisterSize = ((opCode >> 30) & 1) != 0 + ? RegisterSize.Simd128 + : RegisterSize.Simd64; + } + + private static long ShlOnes(long value, int shift) + { + if (shift != 0) + { + return value << shift | (long)(ulong.MaxValue >> (64 - shift)); + } + else + { + return value; + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeSimdIns.cs b/src/ARMeilleure/Decoders/OpCodeSimdIns.cs new file mode 100644 index 00000000..f6f9249d --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeSimdIns.cs @@ -0,0 +1,36 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdIns : OpCodeSimd + { + public int SrcIndex { get; } + public int DstIndex { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdIns(inst, address, opCode); + + public OpCodeSimdIns(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int imm4 = (opCode >> 11) & 0xf; + int imm5 = (opCode >> 16) & 0x1f; + + if (imm5 == 0b10000) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + Size = imm5 & -imm5; + + switch (Size) + { + case 1: Size = 0; break; + case 2: Size = 1; break; + case 4: Size = 2; break; + case 8: Size = 3; break; + } + + SrcIndex = imm4 >> Size; + DstIndex = imm5 >> (Size + 1); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeSimdMemImm.cs b/src/ARMeilleure/Decoders/OpCodeSimdMemImm.cs new file mode 100644 index 00000000..c11594cb --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeSimdMemImm.cs @@ -0,0 +1,28 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdMemImm : OpCodeMemImm, IOpCodeSimd + { + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdMemImm(inst, address, opCode); + + public OpCodeSimdMemImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Size |= (opCode >> 21) & 4; + + if (Size > 4) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + // Base class already shifts the immediate, we only + // need to shift it if size (scale) is 4, since this value is only set here. + if (!WBack && !Unscaled && Size == 4) + { + Immediate <<= 4; + } + + Extend64 = false; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeSimdMemLit.cs b/src/ARMeilleure/Decoders/OpCodeSimdMemLit.cs new file mode 100644 index 00000000..8e212966 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeSimdMemLit.cs @@ -0,0 +1,31 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdMemLit : OpCode, IOpCodeSimd, IOpCodeLit + { + public int Rt { get; } + public long Immediate { get; } + public int Size { get; } + public bool Signed => false; + public bool Prefetch => false; + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdMemLit(inst, address, opCode); + + public OpCodeSimdMemLit(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int opc = (opCode >> 30) & 3; + + if (opc == 3) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + Rt = opCode & 0x1f; + + Immediate = (long)address + DecoderHelper.DecodeImmS19_2(opCode); + + Size = opc + 2; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeSimdMemMs.cs b/src/ARMeilleure/Decoders/OpCodeSimdMemMs.cs new file mode 100644 index 00000000..8922c18f --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeSimdMemMs.cs @@ -0,0 +1,48 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdMemMs : OpCodeMemReg, IOpCodeSimd + { + public int Reps { get; } + public int SElems { get; } + public int Elems { get; } + public bool WBack { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdMemMs(inst, address, opCode); + + public OpCodeSimdMemMs(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + switch ((opCode >> 12) & 0xf) + { + case 0b0000: Reps = 1; SElems = 4; break; + case 0b0010: Reps = 4; SElems = 1; break; + case 0b0100: Reps = 1; SElems = 3; break; + case 0b0110: Reps = 3; SElems = 1; break; + case 0b0111: Reps = 1; SElems = 1; break; + case 0b1000: Reps = 1; SElems = 2; break; + case 0b1010: Reps = 2; SElems = 1; break; + + default: Instruction = InstDescriptor.Undefined; return; + } + + Size = (opCode >> 10) & 3; + WBack = ((opCode >> 23) & 1) != 0; + + bool q = ((opCode >> 30) & 1) != 0; + + if (!q && Size == 3 && SElems != 1) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + Extend64 = false; + + RegisterSize = q + ? RegisterSize.Simd128 + : RegisterSize.Simd64; + + Elems = (GetBitsCount() >> 3) >> Size; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeSimdMemPair.cs b/src/ARMeilleure/Decoders/OpCodeSimdMemPair.cs new file mode 100644 index 00000000..1ab95367 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeSimdMemPair.cs @@ -0,0 +1,16 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdMemPair : OpCodeMemPair, IOpCodeSimd + { + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdMemPair(inst, address, opCode); + + public OpCodeSimdMemPair(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Size = ((opCode >> 30) & 3) + 2; + + Extend64 = false; + + DecodeImm(opCode); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeSimdMemReg.cs b/src/ARMeilleure/Decoders/OpCodeSimdMemReg.cs new file mode 100644 index 00000000..9ea6dda3 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeSimdMemReg.cs @@ -0,0 +1,21 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdMemReg : OpCodeMemReg, IOpCodeSimd + { + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdMemReg(inst, address, opCode); + + public OpCodeSimdMemReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Size |= (opCode >> 21) & 4; + + if (Size > 4) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + Extend64 = false; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeSimdMemSs.cs b/src/ARMeilleure/Decoders/OpCodeSimdMemSs.cs new file mode 100644 index 00000000..44abdd38 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeSimdMemSs.cs @@ -0,0 +1,97 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdMemSs : OpCodeMemReg, IOpCodeSimd + { + public int SElems { get; } + public int Index { get; } + public bool Replicate { get; } + public bool WBack { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdMemSs(inst, address, opCode); + + public OpCodeSimdMemSs(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int size = (opCode >> 10) & 3; + int s = (opCode >> 12) & 1; + int sElems = (opCode >> 12) & 2; + int scale = (opCode >> 14) & 3; + int l = (opCode >> 22) & 1; + int q = (opCode >> 30) & 1; + + sElems |= (opCode >> 21) & 1; + + sElems++; + + int index = (q << 3) | (s << 2) | size; + + switch (scale) + { + case 1: + { + if ((size & 1) != 0) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + index >>= 1; + + break; + } + + case 2: + { + if ((size & 2) != 0 || + ((size & 1) != 0 && s != 0)) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + if ((size & 1) != 0) + { + index >>= 3; + + scale = 3; + } + else + { + index >>= 2; + } + + break; + } + + case 3: + { + if (l == 0 || s != 0) + { + Instruction = InstDescriptor.Undefined; + + return; + } + + scale = size; + + Replicate = true; + + break; + } + } + + Index = index; + SElems = sElems; + Size = scale; + + Extend64 = false; + + WBack = ((opCode >> 23) & 1) != 0; + + RegisterSize = q != 0 + ? RegisterSize.Simd128 + : RegisterSize.Simd64; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeSimdReg.cs b/src/ARMeilleure/Decoders/OpCodeSimdReg.cs new file mode 100644 index 00000000..ac4f71da --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeSimdReg.cs @@ -0,0 +1,18 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdReg : OpCodeSimd + { + public bool Bit3 { get; } + public int Ra { get; } + public int Rm { get; protected set; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdReg(inst, address, opCode); + + public OpCodeSimdReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Bit3 = ((opCode >> 3) & 0x1) != 0; + Ra = (opCode >> 10) & 0x1f; + Rm = (opCode >> 16) & 0x1f; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeSimdRegElem.cs b/src/ARMeilleure/Decoders/OpCodeSimdRegElem.cs new file mode 100644 index 00000000..92368dee --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeSimdRegElem.cs @@ -0,0 +1,31 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdRegElem : OpCodeSimdReg + { + public int Index { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdRegElem(inst, address, opCode); + + public OpCodeSimdRegElem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + switch (Size) + { + case 1: + Index = (opCode >> 20) & 3 | + (opCode >> 9) & 4; + + Rm &= 0xf; + + break; + + case 2: + Index = (opCode >> 21) & 1 | + (opCode >> 10) & 2; + + break; + + default: Instruction = InstDescriptor.Undefined; break; + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeSimdRegElemF.cs b/src/ARMeilleure/Decoders/OpCodeSimdRegElemF.cs new file mode 100644 index 00000000..d46dd57e --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeSimdRegElemF.cs @@ -0,0 +1,33 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdRegElemF : OpCodeSimdReg + { + public int Index { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdRegElemF(inst, address, opCode); + + public OpCodeSimdRegElemF(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + switch ((opCode >> 21) & 3) // sz:L + { + case 0: // H:0 + Index = (opCode >> 10) & 2; // 0, 2 + + break; + + case 1: // H:1 + Index = (opCode >> 10) & 2; + Index++; // 1, 3 + + break; + + case 2: // H + Index = (opCode >> 11) & 1; // 0, 1 + + break; + + default: Instruction = InstDescriptor.Undefined; break; + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeSimdShImm.cs b/src/ARMeilleure/Decoders/OpCodeSimdShImm.cs new file mode 100644 index 00000000..7064f1d2 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeSimdShImm.cs @@ -0,0 +1,18 @@ +using ARMeilleure.Common; + +namespace ARMeilleure.Decoders +{ + class OpCodeSimdShImm : OpCodeSimd + { + public int Imm { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdShImm(inst, address, opCode); + + public OpCodeSimdShImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Imm = (opCode >> 16) & 0x7f; + + Size = BitUtils.HighestBitSetNibble(Imm >> 3); + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeSimdTbl.cs b/src/ARMeilleure/Decoders/OpCodeSimdTbl.cs new file mode 100644 index 00000000..9c631e48 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeSimdTbl.cs @@ -0,0 +1,12 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSimdTbl : OpCodeSimdReg + { + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSimdTbl(inst, address, opCode); + + public OpCodeSimdTbl(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Size = ((opCode >> 13) & 3) + 1; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeSystem.cs b/src/ARMeilleure/Decoders/OpCodeSystem.cs new file mode 100644 index 00000000..4d79421a --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeSystem.cs @@ -0,0 +1,24 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeSystem : OpCode + { + public int Rt { get; } + public int Op2 { get; } + public int CRm { get; } + public int CRn { get; } + public int Op1 { get; } + public int Op0 { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeSystem(inst, address, opCode); + + public OpCodeSystem(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt = (opCode >> 0) & 0x1f; + Op2 = (opCode >> 5) & 0x7; + CRm = (opCode >> 8) & 0xf; + CRn = (opCode >> 12) & 0xf; + Op1 = (opCode >> 16) & 0x7; + Op0 = ((opCode >> 19) & 0x1) | 2; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeT16.cs b/src/ARMeilleure/Decoders/OpCodeT16.cs new file mode 100644 index 00000000..9c3d6b00 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16.cs @@ -0,0 +1,15 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT16 : OpCode32 + { + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16(inst, address, opCode); + + public OpCodeT16(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Cond = Condition.Al; + + IsThumb = true; + OpCodeSizeInBytes = 2; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeT16AddSubImm3.cs b/src/ARMeilleure/Decoders/OpCodeT16AddSubImm3.cs new file mode 100644 index 00000000..95f18054 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16AddSubImm3.cs @@ -0,0 +1,24 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT16AddSubImm3: OpCodeT16, IOpCode32AluImm + { + public int Rd { get; } + public int Rn { get; } + + public bool? SetFlags => null; + + public int Immediate { get; } + + public bool IsRotated { get; } + + public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16AddSubImm3(inst, address, opCode); + + public OpCodeT16AddSubImm3(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 0) & 0x7; + Rn = (opCode >> 3) & 0x7; + Immediate = (opCode >> 6) & 0x7; + IsRotated = false; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT16AddSubReg.cs b/src/ARMeilleure/Decoders/OpCodeT16AddSubReg.cs new file mode 100644 index 00000000..2a407b2d --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16AddSubReg.cs @@ -0,0 +1,20 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT16AddSubReg : OpCodeT16, IOpCode32AluReg + { + public int Rm { get; } + public int Rd { get; } + public int Rn { get; } + + public bool? SetFlags => null; + + public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16AddSubReg(inst, address, opCode); + + public OpCodeT16AddSubReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 0) & 0x7; + Rn = (opCode >> 3) & 0x7; + Rm = (opCode >> 6) & 0x7; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT16AddSubSp.cs b/src/ARMeilleure/Decoders/OpCodeT16AddSubSp.cs new file mode 100644 index 00000000..b66fe0cd --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16AddSubSp.cs @@ -0,0 +1,23 @@ +using ARMeilleure.State; + +namespace ARMeilleure.Decoders +{ + class OpCodeT16AddSubSp : OpCodeT16, IOpCode32AluImm + { + public int Rd => RegisterAlias.Aarch32Sp; + public int Rn => RegisterAlias.Aarch32Sp; + + public bool? SetFlags => false; + + public int Immediate { get; } + + public bool IsRotated => false; + + public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16AddSubSp(inst, address, opCode); + + public OpCodeT16AddSubSp(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Immediate = ((opCode >> 0) & 0x7f) << 2; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT16Adr.cs b/src/ARMeilleure/Decoders/OpCodeT16Adr.cs new file mode 100644 index 00000000..03abd499 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16Adr.cs @@ -0,0 +1,19 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT16Adr : OpCodeT16, IOpCode32Adr + { + public int Rd { get; } + + public int Immediate { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16Adr(inst, address, opCode); + + public OpCodeT16Adr(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 8) & 7; + + int imm = (opCode & 0xff) << 2; + Immediate = (int)(GetPc() & 0xfffffffc) + imm; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT16AluImm8.cs b/src/ARMeilleure/Decoders/OpCodeT16AluImm8.cs new file mode 100644 index 00000000..673a4604 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16AluImm8.cs @@ -0,0 +1,24 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT16AluImm8 : OpCodeT16, IOpCode32AluImm + { + public int Rd { get; } + public int Rn { get; } + + public bool? SetFlags => null; + + public int Immediate { get; } + + public bool IsRotated { get; } + + public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16AluImm8(inst, address, opCode); + + public OpCodeT16AluImm8(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 8) & 0x7; + Rn = (opCode >> 8) & 0x7; + Immediate = (opCode >> 0) & 0xff; + IsRotated = false; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT16AluImmZero.cs b/src/ARMeilleure/Decoders/OpCodeT16AluImmZero.cs new file mode 100644 index 00000000..b23f8fe0 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16AluImmZero.cs @@ -0,0 +1,24 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT16AluImmZero : OpCodeT16, IOpCode32AluImm + { + public int Rd { get; } + public int Rn { get; } + + public bool? SetFlags => null; + + public int Immediate { get; } + + public bool IsRotated { get; } + + public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16AluImmZero(inst, address, opCode); + + public OpCodeT16AluImmZero(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 0) & 0x7; + Rn = (opCode >> 3) & 0x7; + Immediate = 0; + IsRotated = false; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT16AluRegHigh.cs b/src/ARMeilleure/Decoders/OpCodeT16AluRegHigh.cs new file mode 100644 index 00000000..6d5ac8fd --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16AluRegHigh.cs @@ -0,0 +1,20 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT16AluRegHigh : OpCodeT16, IOpCode32AluReg + { + public int Rm { get; } + public int Rd { get; } + public int Rn { get; } + + public bool? SetFlags => false; + + public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16AluRegHigh(inst, address, opCode); + + public OpCodeT16AluRegHigh(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = ((opCode >> 0) & 0x7) | ((opCode >> 4) & 0x8); + Rn = ((opCode >> 0) & 0x7) | ((opCode >> 4) & 0x8); + Rm = (opCode >> 3) & 0xf; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT16AluRegLow.cs b/src/ARMeilleure/Decoders/OpCodeT16AluRegLow.cs new file mode 100644 index 00000000..b37b4f66 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16AluRegLow.cs @@ -0,0 +1,20 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT16AluRegLow : OpCodeT16, IOpCode32AluReg + { + public int Rm { get; } + public int Rd { get; } + public int Rn { get; } + + public bool? SetFlags => null; + + public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16AluRegLow(inst, address, opCode); + + public OpCodeT16AluRegLow(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 0) & 0x7; + Rn = (opCode >> 0) & 0x7; + Rm = (opCode >> 3) & 0x7; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT16AluUx.cs b/src/ARMeilleure/Decoders/OpCodeT16AluUx.cs new file mode 100644 index 00000000..11d3a8fe --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16AluUx.cs @@ -0,0 +1,22 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT16AluUx : OpCodeT16, IOpCode32AluUx + { + public int Rm { get; } + public int Rd { get; } + public int Rn { get; } + + public bool? SetFlags => false; + + public int RotateBits => 0; + public bool Add => false; + + public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16AluUx(inst, address, opCode); + + public OpCodeT16AluUx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 0) & 0x7; + Rm = (opCode >> 3) & 0x7; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT16BImm11.cs b/src/ARMeilleure/Decoders/OpCodeT16BImm11.cs new file mode 100644 index 00000000..f230b20e --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16BImm11.cs @@ -0,0 +1,15 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT16BImm11 : OpCodeT16, IOpCode32BImm + { + public long Immediate { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16BImm11(inst, address, opCode); + + public OpCodeT16BImm11(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int imm = (opCode << 21) >> 20; + Immediate = GetPc() + imm; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT16BImm8.cs b/src/ARMeilleure/Decoders/OpCodeT16BImm8.cs new file mode 100644 index 00000000..5f684298 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16BImm8.cs @@ -0,0 +1,17 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT16BImm8 : OpCodeT16, IOpCode32BImm + { + public long Immediate { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16BImm8(inst, address, opCode); + + public OpCodeT16BImm8(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Cond = (Condition)((opCode >> 8) & 0xf); + + int imm = (opCode << 24) >> 23; + Immediate = GetPc() + imm; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT16BImmCmp.cs b/src/ARMeilleure/Decoders/OpCodeT16BImmCmp.cs new file mode 100644 index 00000000..68ebac75 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16BImmCmp.cs @@ -0,0 +1,19 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT16BImmCmp : OpCodeT16, IOpCode32BImm + { + public int Rn { get; } + + public long Immediate { get; } + + public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16BImmCmp(inst, address, opCode); + + public OpCodeT16BImmCmp(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rn = (opCode >> 0) & 0x7; + + int imm = ((opCode >> 2) & 0x3e) | ((opCode >> 3) & 0x40); + Immediate = (int)GetPc() + imm; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT16BReg.cs b/src/ARMeilleure/Decoders/OpCodeT16BReg.cs new file mode 100644 index 00000000..3122cd07 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16BReg.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT16BReg : OpCodeT16, IOpCode32BReg + { + public int Rm { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16BReg(inst, address, opCode); + + public OpCodeT16BReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 3) & 0xf; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT16Exception.cs b/src/ARMeilleure/Decoders/OpCodeT16Exception.cs new file mode 100644 index 00000000..bb005083 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16Exception.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT16Exception : OpCodeT16, IOpCode32Exception + { + public int Id { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16Exception(inst, address, opCode); + + public OpCodeT16Exception(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Id = opCode & 0xFF; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT16IfThen.cs b/src/ARMeilleure/Decoders/OpCodeT16IfThen.cs new file mode 100644 index 00000000..8c3de689 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16IfThen.cs @@ -0,0 +1,33 @@ +using System.Collections.Generic; + +namespace ARMeilleure.Decoders +{ + class OpCodeT16IfThen : OpCodeT16 + { + public Condition[] IfThenBlockConds { get; } + + public int IfThenBlockSize { get { return IfThenBlockConds.Length; } } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16IfThen(inst, address, opCode); + + public OpCodeT16IfThen(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + List<Condition> conds = new(); + + int cond = (opCode >> 4) & 0xf; + int mask = opCode & 0xf; + + conds.Add((Condition)cond); + + while ((mask & 7) != 0) + { + int newLsb = (mask >> 3) & 1; + cond = (cond & 0xe) | newLsb; + mask <<= 1; + conds.Add((Condition)cond); + } + + IfThenBlockConds = conds.ToArray(); + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT16MemImm5.cs b/src/ARMeilleure/Decoders/OpCodeT16MemImm5.cs new file mode 100644 index 00000000..20ef31e2 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16MemImm5.cs @@ -0,0 +1,58 @@ +using ARMeilleure.Instructions; +using System; + +namespace ARMeilleure.Decoders +{ + class OpCodeT16MemImm5 : OpCodeT16, IOpCode32Mem + { + public int Rt { get; } + public int Rn { get; } + + public bool WBack => false; + public bool IsLoad { get; } + public bool Index => true; + public bool Add => true; + + public int Immediate { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16MemImm5(inst, address, opCode); + + public OpCodeT16MemImm5(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt = (opCode >> 0) & 7; + Rn = (opCode >> 3) & 7; + + switch (inst.Name) + { + case InstName.Ldr: + case InstName.Ldrb: + case InstName.Ldrh: + IsLoad = true; + break; + case InstName.Str: + case InstName.Strb: + case InstName.Strh: + IsLoad = false; + break; + } + + switch (inst.Name) + { + case InstName.Str: + case InstName.Ldr: + Immediate = ((opCode >> 6) & 0x1f) << 2; + break; + case InstName.Strb: + case InstName.Ldrb: + Immediate = ((opCode >> 6) & 0x1f); + break; + case InstName.Strh: + case InstName.Ldrh: + Immediate = ((opCode >> 6) & 0x1f) << 1; + break; + default: + throw new InvalidOperationException(); + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT16MemLit.cs b/src/ARMeilleure/Decoders/OpCodeT16MemLit.cs new file mode 100644 index 00000000..f8c16e29 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16MemLit.cs @@ -0,0 +1,26 @@ +using ARMeilleure.State; + +namespace ARMeilleure.Decoders +{ + class OpCodeT16MemLit : OpCodeT16, IOpCode32Mem + { + public int Rt { get; } + public int Rn => RegisterAlias.Aarch32Pc; + + public bool WBack => false; + public bool IsLoad => true; + public bool Index => true; + public bool Add => true; + + public int Immediate { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16MemLit(inst, address, opCode); + + public OpCodeT16MemLit(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt = (opCode >> 8) & 7; + + Immediate = (opCode & 0xff) << 2; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT16MemMult.cs b/src/ARMeilleure/Decoders/OpCodeT16MemMult.cs new file mode 100644 index 00000000..f4185cfc --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16MemMult.cs @@ -0,0 +1,34 @@ +using ARMeilleure.Instructions; +using System; +using System.Numerics; + +namespace ARMeilleure.Decoders +{ + class OpCodeT16MemMult : OpCodeT16, IOpCode32MemMult + { + public int Rn { get; } + public int RegisterMask { get; } + public int PostOffset { get; } + public bool IsLoad { get; } + public int Offset { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16MemMult(inst, address, opCode); + + public OpCodeT16MemMult(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + RegisterMask = opCode & 0xff; + Rn = (opCode >> 8) & 7; + + int regCount = BitOperations.PopCount((uint)RegisterMask); + + Offset = 0; + PostOffset = 4 * regCount; + IsLoad = inst.Name switch + { + InstName.Ldm => true, + InstName.Stm => false, + _ => throw new InvalidOperationException() + }; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT16MemReg.cs b/src/ARMeilleure/Decoders/OpCodeT16MemReg.cs new file mode 100644 index 00000000..71100112 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16MemReg.cs @@ -0,0 +1,27 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT16MemReg : OpCodeT16, IOpCode32MemReg + { + public int Rm { get; } + public int Rt { get; } + public int Rn { get; } + + public bool WBack => false; + public bool IsLoad { get; } + public bool Index => true; + public bool Add => true; + + public int Immediate => throw new System.InvalidOperationException(); + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16MemReg(inst, address, opCode); + + public OpCodeT16MemReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt = (opCode >> 0) & 7; + Rn = (opCode >> 3) & 7; + Rm = (opCode >> 6) & 7; + + IsLoad = ((opCode >> 9) & 7) >= 3; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT16MemSp.cs b/src/ARMeilleure/Decoders/OpCodeT16MemSp.cs new file mode 100644 index 00000000..a038b915 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16MemSp.cs @@ -0,0 +1,28 @@ +using ARMeilleure.State; + +namespace ARMeilleure.Decoders +{ + class OpCodeT16MemSp : OpCodeT16, IOpCode32Mem + { + public int Rt { get; } + public int Rn => RegisterAlias.Aarch32Sp; + + public bool WBack => false; + public bool IsLoad { get; } + public bool Index => true; + public bool Add => true; + + public int Immediate { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16MemSp(inst, address, opCode); + + public OpCodeT16MemSp(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt = (opCode >> 8) & 7; + + IsLoad = ((opCode >> 11) & 1) != 0; + + Immediate = ((opCode >> 0) & 0xff) << 2; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT16MemStack.cs b/src/ARMeilleure/Decoders/OpCodeT16MemStack.cs new file mode 100644 index 00000000..9d7b0d20 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16MemStack.cs @@ -0,0 +1,42 @@ +using ARMeilleure.Instructions; +using ARMeilleure.State; +using System; +using System.Numerics; + +namespace ARMeilleure.Decoders +{ + class OpCodeT16MemStack : OpCodeT16, IOpCode32MemMult + { + public int Rn => RegisterAlias.Aarch32Sp; + public int RegisterMask { get; } + public int PostOffset { get; } + public bool IsLoad { get; } + public int Offset { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16MemStack(inst, address, opCode); + + public OpCodeT16MemStack(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int extra = (opCode >> 8) & 1; + int regCount = BitOperations.PopCount((uint)opCode & 0x1ff); + + switch (inst.Name) + { + case InstName.Push: + RegisterMask = (opCode & 0xff) | (extra << 14); + IsLoad = false; + Offset = -4 * regCount; + PostOffset = -4 * regCount; + break; + case InstName.Pop: + RegisterMask = (opCode & 0xff) | (extra << 15); + IsLoad = true; + Offset = 0; + PostOffset = 4 * regCount; + break; + default: + throw new InvalidOperationException(); + } + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT16ShiftImm.cs b/src/ARMeilleure/Decoders/OpCodeT16ShiftImm.cs new file mode 100644 index 00000000..a540026e --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16ShiftImm.cs @@ -0,0 +1,24 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT16ShiftImm : OpCodeT16, IOpCode32AluRsImm + { + public int Rd { get; } + public int Rn { get; } + public int Rm { get; } + + public int Immediate { get; } + public ShiftType ShiftType { get; } + + public bool? SetFlags => null; + + public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16ShiftImm(inst, address, opCode); + + public OpCodeT16ShiftImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 0) & 0x7; + Rm = (opCode >> 3) & 0x7; + Immediate = (opCode >> 6) & 0x1F; + ShiftType = (ShiftType)((opCode >> 11) & 3); + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT16ShiftReg.cs b/src/ARMeilleure/Decoders/OpCodeT16ShiftReg.cs new file mode 100644 index 00000000..9f898281 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16ShiftReg.cs @@ -0,0 +1,27 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT16ShiftReg : OpCodeT16, IOpCode32AluRsReg + { + public int Rm { get; } + public int Rs { get; } + public int Rd { get; } + + public int Rn { get; } + + public ShiftType ShiftType { get; } + + public bool? SetFlags => null; + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16ShiftReg(inst, address, opCode); + + public OpCodeT16ShiftReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 0) & 7; + Rm = (opCode >> 0) & 7; + Rn = (opCode >> 3) & 7; + Rs = (opCode >> 3) & 7; + + ShiftType = (ShiftType)(((opCode >> 6) & 1) | ((opCode >> 7) & 2)); + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT16SpRel.cs b/src/ARMeilleure/Decoders/OpCodeT16SpRel.cs new file mode 100644 index 00000000..d737f5bd --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT16SpRel.cs @@ -0,0 +1,24 @@ +using ARMeilleure.State; + +namespace ARMeilleure.Decoders +{ + class OpCodeT16SpRel : OpCodeT16, IOpCode32AluImm + { + public int Rd { get; } + public int Rn => RegisterAlias.Aarch32Sp; + + public bool? SetFlags => false; + + public int Immediate { get; } + + public bool IsRotated => false; + + public static new OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT16SpRel(inst, address, opCode); + + public OpCodeT16SpRel(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 8) & 0x7; + Immediate = ((opCode >> 0) & 0xff) << 2; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT32.cs b/src/ARMeilleure/Decoders/OpCodeT32.cs new file mode 100644 index 00000000..cf43d429 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT32.cs @@ -0,0 +1,15 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT32 : OpCode32 + { + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32(inst, address, opCode); + + public OpCodeT32(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Cond = Condition.Al; + + IsThumb = true; + OpCodeSizeInBytes = 4; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeT32Alu.cs b/src/ARMeilleure/Decoders/OpCodeT32Alu.cs new file mode 100644 index 00000000..a81b3b3d --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT32Alu.cs @@ -0,0 +1,20 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT32Alu : OpCodeT32, IOpCode32Alu + { + public int Rd { get; } + public int Rn { get; } + + public bool? SetFlags { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32Alu(inst, address, opCode); + + public OpCodeT32Alu(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 8) & 0xf; + Rn = (opCode >> 16) & 0xf; + + SetFlags = ((opCode >> 20) & 1) != 0; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeT32AluBf.cs b/src/ARMeilleure/Decoders/OpCodeT32AluBf.cs new file mode 100644 index 00000000..57ad422f --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT32AluBf.cs @@ -0,0 +1,22 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT32AluBf : OpCodeT32, IOpCode32AluBf + { + public int Rd { get; } + public int Rn { get; } + + public int Msb { get; } + public int Lsb { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32AluBf(inst, address, opCode); + + public OpCodeT32AluBf(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 8) & 0xf; + Rn = (opCode >> 16) & 0xf; + + Msb = (opCode >> 0) & 0x1f; + Lsb = ((opCode >> 6) & 0x3) | ((opCode >> 10) & 0x1c); + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT32AluImm.cs b/src/ARMeilleure/Decoders/OpCodeT32AluImm.cs new file mode 100644 index 00000000..0895c29b --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT32AluImm.cs @@ -0,0 +1,38 @@ +using ARMeilleure.Common; +using System.Runtime.Intrinsics; + +namespace ARMeilleure.Decoders +{ + class OpCodeT32AluImm : OpCodeT32Alu, IOpCode32AluImm + { + public int Immediate { get; } + + public bool IsRotated { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32AluImm(inst, address, opCode); + + private static readonly Vector128<int> _factor = Vector128.Create(1, 0x00010001, 0x01000100, 0x01010101); + + public OpCodeT32AluImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + int imm8 = (opCode >> 0) & 0xff; + int imm3 = (opCode >> 12) & 7; + int imm1 = (opCode >> 26) & 1; + + int imm12 = imm8 | (imm3 << 8) | (imm1 << 11); + + if ((imm12 >> 10) == 0) + { + Immediate = imm8 * _factor.GetElement((imm12 >> 8) & 3); + IsRotated = false; + } + else + { + int shift = imm12 >> 7; + + Immediate = BitUtils.RotateRight(0x80 | (imm12 & 0x7f), shift, 32); + IsRotated = shift != 0; + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeT32AluImm12.cs b/src/ARMeilleure/Decoders/OpCodeT32AluImm12.cs new file mode 100644 index 00000000..31de63dd --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT32AluImm12.cs @@ -0,0 +1,16 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT32AluImm12 : OpCodeT32Alu, IOpCode32AluImm + { + public int Immediate { get; } + + public bool IsRotated => false; + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32AluImm12(inst, address, opCode); + + public OpCodeT32AluImm12(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Immediate = (opCode & 0xff) | ((opCode >> 4) & 0x700) | ((opCode >> 15) & 0x800); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeT32AluMla.cs b/src/ARMeilleure/Decoders/OpCodeT32AluMla.cs new file mode 100644 index 00000000..6cb604da --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT32AluMla.cs @@ -0,0 +1,29 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT32AluMla : OpCodeT32, IOpCode32AluMla + { + public int Rn { get; } + public int Rm { get; } + public int Ra { get; } + public int Rd { get; } + + public bool NHigh { get; } + public bool MHigh { get; } + public bool R { get; } + public bool? SetFlags => false; + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32AluMla(inst, address, opCode); + + public OpCodeT32AluMla(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 0) & 0xf; + Rd = (opCode >> 8) & 0xf; + Ra = (opCode >> 12) & 0xf; + Rn = (opCode >> 16) & 0xf; + R = (opCode & (1 << 4)) != 0; + + MHigh = ((opCode >> 4) & 0x1) == 1; + NHigh = ((opCode >> 5) & 0x1) == 1; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT32AluReg.cs b/src/ARMeilleure/Decoders/OpCodeT32AluReg.cs new file mode 100644 index 00000000..a487f55a --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT32AluReg.cs @@ -0,0 +1,14 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT32AluReg : OpCodeT32Alu, IOpCode32AluReg + { + public int Rm { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32AluReg(inst, address, opCode); + + public OpCodeT32AluReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 0) & 0xf; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeT32AluRsImm.cs b/src/ARMeilleure/Decoders/OpCodeT32AluRsImm.cs new file mode 100644 index 00000000..1c9ba7a2 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT32AluRsImm.cs @@ -0,0 +1,20 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT32AluRsImm : OpCodeT32Alu, IOpCode32AluRsImm + { + public int Rm { get; } + public int Immediate { get; } + + public ShiftType ShiftType { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32AluRsImm(inst, address, opCode); + + public OpCodeT32AluRsImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 0) & 0xf; + Immediate = ((opCode >> 6) & 3) | ((opCode >> 10) & 0x1c); + + ShiftType = (ShiftType)((opCode >> 4) & 3); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeT32AluUmull.cs b/src/ARMeilleure/Decoders/OpCodeT32AluUmull.cs new file mode 100644 index 00000000..a1b2e612 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT32AluUmull.cs @@ -0,0 +1,28 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT32AluUmull : OpCodeT32, IOpCode32AluUmull + { + public int RdLo { get; } + public int RdHi { get; } + public int Rn { get; } + public int Rm { get; } + + public bool NHigh { get; } + public bool MHigh { get; } + + public bool? SetFlags => false; + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32AluUmull(inst, address, opCode); + + public OpCodeT32AluUmull(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 0) & 0xf; + RdHi = (opCode >> 8) & 0xf; + RdLo = (opCode >> 12) & 0xf; + Rn = (opCode >> 16) & 0xf; + + MHigh = ((opCode >> 4) & 0x1) == 1; + NHigh = ((opCode >> 5) & 0x1) == 1; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT32AluUx.cs b/src/ARMeilleure/Decoders/OpCodeT32AluUx.cs new file mode 100644 index 00000000..861dc904 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT32AluUx.cs @@ -0,0 +1,18 @@ +using ARMeilleure.State; + +namespace ARMeilleure.Decoders +{ + class OpCodeT32AluUx : OpCodeT32AluReg, IOpCode32AluUx + { + public int Rotate { get; } + public int RotateBits => Rotate * 8; + public bool Add => Rn != RegisterAlias.Aarch32Pc; + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32AluUx(inst, address, opCode); + + public OpCodeT32AluUx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rotate = (opCode >> 4) & 0x3; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT32BImm20.cs b/src/ARMeilleure/Decoders/OpCodeT32BImm20.cs new file mode 100644 index 00000000..b6da8abd --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT32BImm20.cs @@ -0,0 +1,27 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT32BImm20 : OpCodeT32, IOpCode32BImm + { + public long Immediate { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32BImm20(inst, address, opCode); + + public OpCodeT32BImm20(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + uint pc = GetPc(); + + int imm11 = (opCode >> 0) & 0x7ff; + int j2 = (opCode >> 11) & 1; + int j1 = (opCode >> 13) & 1; + int imm6 = (opCode >> 16) & 0x3f; + int s = (opCode >> 26) & 1; + + int imm32 = imm11 | (imm6 << 11) | (j1 << 17) | (j2 << 18) | (s << 19); + imm32 = (imm32 << 13) >> 12; + + Immediate = pc + imm32; + + Cond = (Condition)((opCode >> 22) & 0xf); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeT32BImm24.cs b/src/ARMeilleure/Decoders/OpCodeT32BImm24.cs new file mode 100644 index 00000000..774ec3a6 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT32BImm24.cs @@ -0,0 +1,35 @@ +using ARMeilleure.Instructions; + +namespace ARMeilleure.Decoders +{ + class OpCodeT32BImm24 : OpCodeT32, IOpCode32BImm + { + public long Immediate { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32BImm24(inst, address, opCode); + + public OpCodeT32BImm24(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + uint pc = GetPc(); + + if (inst.Name == InstName.Blx) + { + pc &= ~3u; + } + + int imm11 = (opCode >> 0) & 0x7ff; + int j2 = (opCode >> 11) & 1; + int j1 = (opCode >> 13) & 1; + int imm10 = (opCode >> 16) & 0x3ff; + int s = (opCode >> 26) & 1; + + int i1 = j1 ^ s ^ 1; + int i2 = j2 ^ s ^ 1; + + int imm32 = imm11 | (imm10 << 11) | (i2 << 21) | (i1 << 22) | (s << 23); + imm32 = (imm32 << 8) >> 7; + + Immediate = pc + imm32; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeT32MemImm12.cs b/src/ARMeilleure/Decoders/OpCodeT32MemImm12.cs new file mode 100644 index 00000000..7838604b --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT32MemImm12.cs @@ -0,0 +1,25 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT32MemImm12 : OpCodeT32, IOpCode32Mem + { + public int Rt { get; } + public int Rn { get; } + public bool WBack => false; + public bool IsLoad { get; } + public bool Index => true; + public bool Add => true; + public int Immediate { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32MemImm12(inst, address, opCode); + + public OpCodeT32MemImm12(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt = (opCode >> 12) & 0xf; + Rn = (opCode >> 16) & 0xf; + + Immediate = opCode & 0xfff; + + IsLoad = ((opCode >> 20) & 1) != 0; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeT32MemImm8.cs b/src/ARMeilleure/Decoders/OpCodeT32MemImm8.cs new file mode 100644 index 00000000..d8b7763c --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT32MemImm8.cs @@ -0,0 +1,29 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT32MemImm8 : OpCodeT32, IOpCode32Mem + { + public int Rt { get; } + public int Rn { get; } + public bool WBack { get; } + public bool IsLoad { get; } + public bool Index { get; } + public bool Add { get; } + public int Immediate { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32MemImm8(inst, address, opCode); + + public OpCodeT32MemImm8(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt = (opCode >> 12) & 0xf; + Rn = (opCode >> 16) & 0xf; + + Index = ((opCode >> 10) & 1) != 0; + Add = ((opCode >> 9) & 1) != 0; + WBack = ((opCode >> 8) & 1) != 0; + + Immediate = opCode & 0xff; + + IsLoad = ((opCode >> 20) & 1) != 0; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeT32MemImm8D.cs b/src/ARMeilleure/Decoders/OpCodeT32MemImm8D.cs new file mode 100644 index 00000000..7a078c48 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT32MemImm8D.cs @@ -0,0 +1,31 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT32MemImm8D : OpCodeT32, IOpCode32Mem + { + public int Rt { get; } + public int Rt2 { get; } + public int Rn { get; } + public bool WBack { get; } + public bool IsLoad { get; } + public bool Index { get; } + public bool Add { get; } + public int Immediate { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32MemImm8D(inst, address, opCode); + + public OpCodeT32MemImm8D(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt2 = (opCode >> 8) & 0xf; + Rt = (opCode >> 12) & 0xf; + Rn = (opCode >> 16) & 0xf; + + Index = ((opCode >> 24) & 1) != 0; + Add = ((opCode >> 23) & 1) != 0; + WBack = ((opCode >> 21) & 1) != 0; + + Immediate = (opCode & 0xff) << 2; + + IsLoad = ((opCode >> 20) & 1) != 0; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeT32MemLdEx.cs b/src/ARMeilleure/Decoders/OpCodeT32MemLdEx.cs new file mode 100644 index 00000000..c8eb36b3 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT32MemLdEx.cs @@ -0,0 +1,26 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT32MemLdEx : OpCodeT32, IOpCode32MemEx + { + public int Rd => 0; + public int Rt { get; } + public int Rt2 { get; } + public int Rn { get; } + + public bool WBack => false; + public bool IsLoad => true; + public bool Index => false; + public bool Add => false; + + public int Immediate => 0; + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32MemLdEx(inst, address, opCode); + + public OpCodeT32MemLdEx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rt2 = (opCode >> 8) & 0xf; + Rt = (opCode >> 12) & 0xf; + Rn = (opCode >> 16) & 0xf; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT32MemMult.cs b/src/ARMeilleure/Decoders/OpCodeT32MemMult.cs new file mode 100644 index 00000000..a9ba306d --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT32MemMult.cs @@ -0,0 +1,52 @@ +using System.Numerics; + +namespace ARMeilleure.Decoders +{ + class OpCodeT32MemMult : OpCodeT32, IOpCode32MemMult + { + public int Rn { get; } + + public int RegisterMask { get; } + public int Offset { get; } + public int PostOffset { get; } + + public bool IsLoad { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32MemMult(inst, address, opCode); + + public OpCodeT32MemMult(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rn = (opCode >> 16) & 0xf; + + bool isLoad = (opCode & (1 << 20)) != 0; + bool w = (opCode & (1 << 21)) != 0; + bool u = (opCode & (1 << 23)) != 0; + bool p = (opCode & (1 << 24)) != 0; + + RegisterMask = opCode & 0xffff; + + int regsSize = BitOperations.PopCount((uint)RegisterMask) * 4; + + if (!u) + { + Offset -= regsSize; + } + + if (u == p) + { + Offset += 4; + } + + if (w) + { + PostOffset = u ? regsSize : -regsSize; + } + else + { + PostOffset = 0; + } + + IsLoad = isLoad; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeT32MemRsImm.cs b/src/ARMeilleure/Decoders/OpCodeT32MemRsImm.cs new file mode 100644 index 00000000..056d3b46 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT32MemRsImm.cs @@ -0,0 +1,30 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT32MemRsImm : OpCodeT32, IOpCode32MemRsImm + { + public int Rt { get; } + public int Rn { get; } + public int Rm { get; } + public ShiftType ShiftType => ShiftType.Lsl; + + public bool WBack => false; + public bool IsLoad { get; } + public bool Index => true; + public bool Add => true; + + public int Immediate { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32MemRsImm(inst, address, opCode); + + public OpCodeT32MemRsImm(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 0) & 0xf; + Rt = (opCode >> 12) & 0xf; + Rn = (opCode >> 16) & 0xf; + + IsLoad = (opCode & (1 << 20)) != 0; + + Immediate = (opCode >> 4) & 3; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT32MemStEx.cs b/src/ARMeilleure/Decoders/OpCodeT32MemStEx.cs new file mode 100644 index 00000000..6a0a6bb1 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT32MemStEx.cs @@ -0,0 +1,27 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT32MemStEx : OpCodeT32, IOpCode32MemEx + { + public int Rd { get; } + public int Rt { get; } + public int Rt2 { get; } + public int Rn { get; } + + public bool WBack => false; + public bool IsLoad => false; + public bool Index => false; + public bool Add => false; + + public int Immediate => 0; + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32MemStEx(inst, address, opCode); + + public OpCodeT32MemStEx(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rd = (opCode >> 0) & 0xf; + Rt2 = (opCode >> 8) & 0xf; + Rt = (opCode >> 12) & 0xf; + Rn = (opCode >> 16) & 0xf; + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT32MovImm16.cs b/src/ARMeilleure/Decoders/OpCodeT32MovImm16.cs new file mode 100644 index 00000000..5161892b --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT32MovImm16.cs @@ -0,0 +1,16 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT32MovImm16 : OpCodeT32Alu, IOpCode32AluImm16 + { + public int Immediate { get; } + + public bool IsRotated => false; + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32MovImm16(inst, address, opCode); + + public OpCodeT32MovImm16(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Immediate = (opCode & 0xff) | ((opCode >> 4) & 0x700) | ((opCode >> 15) & 0x800) | ((opCode >> 4) & 0xf000); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeT32ShiftReg.cs b/src/ARMeilleure/Decoders/OpCodeT32ShiftReg.cs new file mode 100644 index 00000000..36055975 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT32ShiftReg.cs @@ -0,0 +1,19 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT32ShiftReg : OpCodeT32Alu, IOpCode32AluRsReg + { + public int Rm => Rn; + public int Rs { get; } + + public ShiftType ShiftType { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32ShiftReg(inst, address, opCode); + + public OpCodeT32ShiftReg(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rs = (opCode >> 0) & 0xf; + + ShiftType = (ShiftType)((opCode >> 21) & 3); + } + } +} diff --git a/src/ARMeilleure/Decoders/OpCodeT32Tb.cs b/src/ARMeilleure/Decoders/OpCodeT32Tb.cs new file mode 100644 index 00000000..527754b1 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeT32Tb.cs @@ -0,0 +1,16 @@ +namespace ARMeilleure.Decoders +{ + class OpCodeT32Tb : OpCodeT32, IOpCode32BReg + { + public int Rm { get; } + public int Rn { get; } + + public new static OpCode Create(InstDescriptor inst, ulong address, int opCode) => new OpCodeT32Tb(inst, address, opCode); + + public OpCodeT32Tb(InstDescriptor inst, ulong address, int opCode) : base(inst, address, opCode) + { + Rm = (opCode >> 0) & 0xf; + Rn = (opCode >> 16) & 0xf; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/OpCodeTable.cs b/src/ARMeilleure/Decoders/OpCodeTable.cs new file mode 100644 index 00000000..4f359958 --- /dev/null +++ b/src/ARMeilleure/Decoders/OpCodeTable.cs @@ -0,0 +1,1509 @@ +using ARMeilleure.Instructions; +using System; +using System.Collections.Generic; +using System.Numerics; + +namespace ARMeilleure.Decoders +{ + static class OpCodeTable + { + public delegate OpCode MakeOp(InstDescriptor inst, ulong address, int opCode); + + private const int FastLookupSize = 0x1000; + + private readonly struct InstInfo + { + public int Mask { get; } + public int Value { get; } + + public InstDescriptor Inst { get; } + + public MakeOp MakeOp { get; } + + public InstInfo(int mask, int value, InstDescriptor inst, MakeOp makeOp) + { + Mask = mask; + Value = value; + Inst = inst; + MakeOp = makeOp; + } + } + + private static List<InstInfo> AllInstA32 = new(); + private static List<InstInfo> AllInstT32 = new(); + private static List<InstInfo> AllInstA64 = new(); + + private static InstInfo[][] InstA32FastLookup = new InstInfo[FastLookupSize][]; + private static InstInfo[][] InstT32FastLookup = new InstInfo[FastLookupSize][]; + private static InstInfo[][] InstA64FastLookup = new InstInfo[FastLookupSize][]; + + static OpCodeTable() + { +#region "OpCode Table (AArch64)" + // Base + SetA64("x0011010000xxxxx000000xxxxxxxxxx", InstName.Adc, InstEmit.Adc, OpCodeAluRs.Create); + SetA64("x0111010000xxxxx000000xxxxxxxxxx", InstName.Adcs, InstEmit.Adcs, OpCodeAluRs.Create); + SetA64("x00100010xxxxxxxxxxxxxxxxxxxxxxx", InstName.Add, InstEmit.Add, OpCodeAluImm.Create); + SetA64("00001011<<0xxxxx0xxxxxxxxxxxxxxx", InstName.Add, InstEmit.Add, OpCodeAluRs.Create); + SetA64("10001011<<0xxxxxxxxxxxxxxxxxxxxx", InstName.Add, InstEmit.Add, OpCodeAluRs.Create); + SetA64("x0001011001xxxxxxxx0xxxxxxxxxxxx", InstName.Add, InstEmit.Add, OpCodeAluRx.Create); + SetA64("x0001011001xxxxxxxx100xxxxxxxxxx", InstName.Add, InstEmit.Add, OpCodeAluRx.Create); + SetA64("x01100010xxxxxxxxxxxxxxxxxxxxxxx", InstName.Adds, InstEmit.Adds, OpCodeAluImm.Create); + SetA64("00101011<<0xxxxx0xxxxxxxxxxxxxxx", InstName.Adds, InstEmit.Adds, OpCodeAluRs.Create); + SetA64("10101011<<0xxxxxxxxxxxxxxxxxxxxx", InstName.Adds, InstEmit.Adds, OpCodeAluRs.Create); + SetA64("x0101011001xxxxxxxx0xxxxxxxxxxxx", InstName.Adds, InstEmit.Adds, OpCodeAluRx.Create); + SetA64("x0101011001xxxxxxxx100xxxxxxxxxx", InstName.Adds, InstEmit.Adds, OpCodeAluRx.Create); + SetA64("0xx10000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Adr, InstEmit.Adr, OpCodeAdr.Create); + SetA64("1xx10000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Adrp, InstEmit.Adrp, OpCodeAdr.Create); + SetA64("0001001000xxxxxxxxxxxxxxxxxxxxxx", InstName.And, InstEmit.And, OpCodeAluImm.Create); + SetA64("100100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.And, InstEmit.And, OpCodeAluImm.Create); + SetA64("00001010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.And, InstEmit.And, OpCodeAluRs.Create); + SetA64("10001010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.And, InstEmit.And, OpCodeAluRs.Create); + SetA64("0111001000xxxxxxxxxxxxxxxxxxxxxx", InstName.Ands, InstEmit.Ands, OpCodeAluImm.Create); + SetA64("111100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.Ands, InstEmit.Ands, OpCodeAluImm.Create); + SetA64("01101010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.Ands, InstEmit.Ands, OpCodeAluRs.Create); + SetA64("11101010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.Ands, InstEmit.Ands, OpCodeAluRs.Create); + SetA64("x0011010110xxxxx001010xxxxxxxxxx", InstName.Asrv, InstEmit.Asrv, OpCodeAluRs.Create); + SetA64("000101xxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.B, InstEmit.B, OpCodeBImmAl.Create); + SetA64("01010100xxxxxxxxxxxxxxxxxxx0xxxx", InstName.B_Cond, InstEmit.B_Cond, OpCodeBImmCond.Create); + SetA64("00110011000xxxxx0xxxxxxxxxxxxxxx", InstName.Bfm, InstEmit.Bfm, OpCodeBfm.Create); + SetA64("1011001101xxxxxxxxxxxxxxxxxxxxxx", InstName.Bfm, InstEmit.Bfm, OpCodeBfm.Create); + SetA64("00001010xx1xxxxx0xxxxxxxxxxxxxxx", InstName.Bic, InstEmit.Bic, OpCodeAluRs.Create); + SetA64("10001010xx1xxxxxxxxxxxxxxxxxxxxx", InstName.Bic, InstEmit.Bic, OpCodeAluRs.Create); + SetA64("01101010xx1xxxxx0xxxxxxxxxxxxxxx", InstName.Bics, InstEmit.Bics, OpCodeAluRs.Create); + SetA64("11101010xx1xxxxxxxxxxxxxxxxxxxxx", InstName.Bics, InstEmit.Bics, OpCodeAluRs.Create); + SetA64("100101xxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bl, InstEmit.Bl, OpCodeBImmAl.Create); + SetA64("1101011000111111000000xxxxx00000", InstName.Blr, InstEmit.Blr, OpCodeBReg.Create); + SetA64("1101011000011111000000xxxxx00000", InstName.Br, InstEmit.Br, OpCodeBReg.Create); + SetA64("11010100001xxxxxxxxxxxxxxxx00000", InstName.Brk, InstEmit.Brk, OpCodeException.Create); + SetA64("x0110101xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cbnz, InstEmit.Cbnz, OpCodeBImmCmp.Create); + SetA64("x0110100xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cbz, InstEmit.Cbz, OpCodeBImmCmp.Create); + SetA64("x0111010010xxxxxxxxx10xxxxx0xxxx", InstName.Ccmn, InstEmit.Ccmn, OpCodeCcmpImm.Create); + SetA64("x0111010010xxxxxxxxx00xxxxx0xxxx", InstName.Ccmn, InstEmit.Ccmn, OpCodeCcmpReg.Create); + SetA64("x1111010010xxxxxxxxx10xxxxx0xxxx", InstName.Ccmp, InstEmit.Ccmp, OpCodeCcmpImm.Create); + SetA64("x1111010010xxxxxxxxx00xxxxx0xxxx", InstName.Ccmp, InstEmit.Ccmp, OpCodeCcmpReg.Create); + SetA64("11010101000000110011xxxx01011111", InstName.Clrex, InstEmit.Clrex, OpCodeSystem.Create); + SetA64("x101101011000000000101xxxxxxxxxx", InstName.Cls, InstEmit.Cls, OpCodeAlu.Create); + SetA64("x101101011000000000100xxxxxxxxxx", InstName.Clz, InstEmit.Clz, OpCodeAlu.Create); + SetA64("00011010110xxxxx010000xxxxxxxxxx", InstName.Crc32b, InstEmit.Crc32b, OpCodeAluBinary.Create); + SetA64("00011010110xxxxx010001xxxxxxxxxx", InstName.Crc32h, InstEmit.Crc32h, OpCodeAluBinary.Create); + SetA64("00011010110xxxxx010010xxxxxxxxxx", InstName.Crc32w, InstEmit.Crc32w, OpCodeAluBinary.Create); + SetA64("10011010110xxxxx010011xxxxxxxxxx", InstName.Crc32x, InstEmit.Crc32x, OpCodeAluBinary.Create); + SetA64("00011010110xxxxx010100xxxxxxxxxx", InstName.Crc32cb, InstEmit.Crc32cb, OpCodeAluBinary.Create); + SetA64("00011010110xxxxx010101xxxxxxxxxx", InstName.Crc32ch, InstEmit.Crc32ch, OpCodeAluBinary.Create); + SetA64("00011010110xxxxx010110xxxxxxxxxx", InstName.Crc32cw, InstEmit.Crc32cw, OpCodeAluBinary.Create); + SetA64("10011010110xxxxx010111xxxxxxxxxx", InstName.Crc32cx, InstEmit.Crc32cx, OpCodeAluBinary.Create); + SetA64("11010101000000110010001010011111", InstName.Csdb, InstEmit.Csdb, OpCodeSystem.Create); + SetA64("x0011010100xxxxxxxxx00xxxxxxxxxx", InstName.Csel, InstEmit.Csel, OpCodeCsel.Create); + SetA64("x0011010100xxxxxxxxx01xxxxxxxxxx", InstName.Csinc, InstEmit.Csinc, OpCodeCsel.Create); + SetA64("x1011010100xxxxxxxxx00xxxxxxxxxx", InstName.Csinv, InstEmit.Csinv, OpCodeCsel.Create); + SetA64("x1011010100xxxxxxxxx01xxxxxxxxxx", InstName.Csneg, InstEmit.Csneg, OpCodeCsel.Create); + SetA64("11010101000000110011xxxx10111111", InstName.Dmb, InstEmit.Dmb, OpCodeSystem.Create); + SetA64("11010101000000110011xxxx10011111", InstName.Dsb, InstEmit.Dsb, OpCodeSystem.Create); + SetA64("01001010xx1xxxxx0xxxxxxxxxxxxxxx", InstName.Eon, InstEmit.Eon, OpCodeAluRs.Create); + SetA64("11001010xx1xxxxxxxxxxxxxxxxxxxxx", InstName.Eon, InstEmit.Eon, OpCodeAluRs.Create); + SetA64("0101001000xxxxxxxxxxxxxxxxxxxxxx", InstName.Eor, InstEmit.Eor, OpCodeAluImm.Create); + SetA64("110100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.Eor, InstEmit.Eor, OpCodeAluImm.Create); + SetA64("01001010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.Eor, InstEmit.Eor, OpCodeAluRs.Create); + SetA64("11001010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.Eor, InstEmit.Eor, OpCodeAluRs.Create); + SetA64("00010011100xxxxx0xxxxxxxxxxxxxxx", InstName.Extr, InstEmit.Extr, OpCodeAluRs.Create); + SetA64("10010011110xxxxxxxxxxxxxxxxxxxxx", InstName.Extr, InstEmit.Extr, OpCodeAluRs.Create); + SetA64("11010101000000110010000011011111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint + SetA64("11010101000000110010000011111111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint + SetA64("110101010000001100100001xxx11111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint + SetA64("1101010100000011001000100xx11111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint + SetA64("1101010100000011001000101>>11111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint + SetA64("110101010000001100100011xxx11111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint + SetA64("11010101000000110010>>xxxxx11111", InstName.Hint, InstEmit.Nop, OpCodeSystem.Create); // Reserved Hint + SetA64("11010101000000110011xxxx11011111", InstName.Isb, InstEmit.Isb, OpCodeSystem.Create); + SetA64("xx001000110xxxxx1xxxxxxxxxxxxxxx", InstName.Ldar, InstEmit.Ldar, OpCodeMemEx.Create); + SetA64("1x001000011xxxxx1xxxxxxxxxxxxxxx", InstName.Ldaxp, InstEmit.Ldaxp, OpCodeMemEx.Create); + SetA64("xx001000010xxxxx1xxxxxxxxxxxxxxx", InstName.Ldaxr, InstEmit.Ldaxr, OpCodeMemEx.Create); + SetA64("<<10100xx1xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldp, InstEmit.Ldp, OpCodeMemPair.Create); + SetA64("xx111000010xxxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, OpCodeMemImm.Create); + SetA64("xx11100101xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, OpCodeMemImm.Create); + SetA64("xx111000011xxxxxxxxx10xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, OpCodeMemReg.Create); + SetA64("xx011000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr_Literal, InstEmit.Ldr_Literal, OpCodeMemLit.Create); + SetA64("0x1110001x0xxxxxxxxxxxxxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, OpCodeMemImm.Create); + SetA64("0x1110011xxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, OpCodeMemImm.Create); + SetA64("10111000100xxxxxxxxxxxxxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, OpCodeMemImm.Create); + SetA64("1011100110xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, OpCodeMemImm.Create); + SetA64("0x1110001x1xxxxxxxxx10xxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, OpCodeMemReg.Create); + SetA64("10111000101xxxxxxxxx10xxxxxxxxxx", InstName.Ldrs, InstEmit.Ldrs, OpCodeMemReg.Create); + SetA64("xx001000010xxxxx0xxxxxxxxxxxxxxx", InstName.Ldxr, InstEmit.Ldxr, OpCodeMemEx.Create); + SetA64("1x001000011xxxxx0xxxxxxxxxxxxxxx", InstName.Ldxp, InstEmit.Ldxp, OpCodeMemEx.Create); + SetA64("x0011010110xxxxx001000xxxxxxxxxx", InstName.Lslv, InstEmit.Lslv, OpCodeAluRs.Create); + SetA64("x0011010110xxxxx001001xxxxxxxxxx", InstName.Lsrv, InstEmit.Lsrv, OpCodeAluRs.Create); + SetA64("x0011011000xxxxx0xxxxxxxxxxxxxxx", InstName.Madd, InstEmit.Madd, OpCodeMul.Create); + SetA64("0111001010xxxxxxxxxxxxxxxxxxxxxx", InstName.Movk, InstEmit.Movk, OpCodeMov.Create); + SetA64("111100101xxxxxxxxxxxxxxxxxxxxxxx", InstName.Movk, InstEmit.Movk, OpCodeMov.Create); + SetA64("0001001010xxxxxxxxxxxxxxxxxxxxxx", InstName.Movn, InstEmit.Movn, OpCodeMov.Create); + SetA64("100100101xxxxxxxxxxxxxxxxxxxxxxx", InstName.Movn, InstEmit.Movn, OpCodeMov.Create); + SetA64("0101001010xxxxxxxxxxxxxxxxxxxxxx", InstName.Movz, InstEmit.Movz, OpCodeMov.Create); + SetA64("110100101xxxxxxxxxxxxxxxxxxxxxxx", InstName.Movz, InstEmit.Movz, OpCodeMov.Create); + SetA64("110101010011xxxxxxxxxxxxxxxxxxxx", InstName.Mrs, InstEmit.Mrs, OpCodeSystem.Create); + SetA64("110101010001xxxxxxxxxxxxxxxxxxxx", InstName.Msr, InstEmit.Msr, OpCodeSystem.Create); + SetA64("x0011011000xxxxx1xxxxxxxxxxxxxxx", InstName.Msub, InstEmit.Msub, OpCodeMul.Create); + SetA64("11010101000000110010000000011111", InstName.Nop, InstEmit.Nop, OpCodeSystem.Create); + SetA64("00101010xx1xxxxx0xxxxxxxxxxxxxxx", InstName.Orn, InstEmit.Orn, OpCodeAluRs.Create); + SetA64("10101010xx1xxxxxxxxxxxxxxxxxxxxx", InstName.Orn, InstEmit.Orn, OpCodeAluRs.Create); + SetA64("0011001000xxxxxxxxxxxxxxxxxxxxxx", InstName.Orr, InstEmit.Orr, OpCodeAluImm.Create); + SetA64("101100100xxxxxxxxxxxxxxxxxxxxxxx", InstName.Orr, InstEmit.Orr, OpCodeAluImm.Create); + SetA64("00101010xx0xxxxx0xxxxxxxxxxxxxxx", InstName.Orr, InstEmit.Orr, OpCodeAluRs.Create); + SetA64("10101010xx0xxxxxxxxxxxxxxxxxxxxx", InstName.Orr, InstEmit.Orr, OpCodeAluRs.Create); + SetA64("1111100110xxxxxxxxxxxxxxxxxxxxxx", InstName.Prfm, InstEmit.Prfm, OpCodeMemImm.Create); // immediate + SetA64("11111000100xxxxxxxxx00xxxxxxxxxx", InstName.Prfm, InstEmit.Prfm, OpCodeMemImm.Create); // prfum (unscaled offset) + SetA64("11011000xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Prfm, InstEmit.Prfm, OpCodeMemLit.Create); // literal + SetA64("11111000101xxxxxxxxx10xxxxxxxxxx", InstName.Prfm, InstEmit.Prfm, OpCodeMemReg.Create); // register + SetA64("x101101011000000000000xxxxxxxxxx", InstName.Rbit, InstEmit.Rbit, OpCodeAlu.Create); + SetA64("1101011001011111000000xxxxx00000", InstName.Ret, InstEmit.Ret, OpCodeBReg.Create); + SetA64("x101101011000000000001xxxxxxxxxx", InstName.Rev16, InstEmit.Rev16, OpCodeAlu.Create); + SetA64("x101101011000000000010xxxxxxxxxx", InstName.Rev32, InstEmit.Rev32, OpCodeAlu.Create); + SetA64("1101101011000000000011xxxxxxxxxx", InstName.Rev64, InstEmit.Rev64, OpCodeAlu.Create); + SetA64("x0011010110xxxxx001011xxxxxxxxxx", InstName.Rorv, InstEmit.Rorv, OpCodeAluRs.Create); + SetA64("x1011010000xxxxx000000xxxxxxxxxx", InstName.Sbc, InstEmit.Sbc, OpCodeAluRs.Create); + SetA64("x1111010000xxxxx000000xxxxxxxxxx", InstName.Sbcs, InstEmit.Sbcs, OpCodeAluRs.Create); + SetA64("00010011000xxxxx0xxxxxxxxxxxxxxx", InstName.Sbfm, InstEmit.Sbfm, OpCodeBfm.Create); + SetA64("1001001101xxxxxxxxxxxxxxxxxxxxxx", InstName.Sbfm, InstEmit.Sbfm, OpCodeBfm.Create); + SetA64("x0011010110xxxxx000011xxxxxxxxxx", InstName.Sdiv, InstEmit.Sdiv, OpCodeAluBinary.Create); + SetA64("11010101000000110010000010011111", InstName.Sev, InstEmit.Nop, OpCodeSystem.Create); + SetA64("11010101000000110010000010111111", InstName.Sevl, InstEmit.Nop, OpCodeSystem.Create); + SetA64("10011011001xxxxx0xxxxxxxxxxxxxxx", InstName.Smaddl, InstEmit.Smaddl, OpCodeMul.Create); + SetA64("10011011001xxxxx1xxxxxxxxxxxxxxx", InstName.Smsubl, InstEmit.Smsubl, OpCodeMul.Create); + SetA64("10011011010xxxxx0xxxxxxxxxxxxxxx", InstName.Smulh, InstEmit.Smulh, OpCodeMul.Create); + SetA64("xx001000100xxxxx1xxxxxxxxxxxxxxx", InstName.Stlr, InstEmit.Stlr, OpCodeMemEx.Create); + SetA64("1x001000001xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxp, InstEmit.Stlxp, OpCodeMemEx.Create); + SetA64("xx001000000xxxxx1xxxxxxxxxxxxxxx", InstName.Stlxr, InstEmit.Stlxr, OpCodeMemEx.Create); + SetA64("x010100xx0xxxxxxxxxxxxxxxxxxxxxx", InstName.Stp, InstEmit.Stp, OpCodeMemPair.Create); + SetA64("xx111000000xxxxxxxxxxxxxxxxxxxxx", InstName.Str, InstEmit.Str, OpCodeMemImm.Create); + SetA64("xx11100100xxxxxxxxxxxxxxxxxxxxxx", InstName.Str, InstEmit.Str, OpCodeMemImm.Create); + SetA64("xx111000001xxxxxxxxx10xxxxxxxxxx", InstName.Str, InstEmit.Str, OpCodeMemReg.Create); + SetA64("1x001000001xxxxx0xxxxxxxxxxxxxxx", InstName.Stxp, InstEmit.Stxp, OpCodeMemEx.Create); + SetA64("xx001000000xxxxx0xxxxxxxxxxxxxxx", InstName.Stxr, InstEmit.Stxr, OpCodeMemEx.Create); + SetA64("x10100010xxxxxxxxxxxxxxxxxxxxxxx", InstName.Sub, InstEmit.Sub, OpCodeAluImm.Create); + SetA64("01001011<<0xxxxx0xxxxxxxxxxxxxxx", InstName.Sub, InstEmit.Sub, OpCodeAluRs.Create); + SetA64("11001011<<0xxxxxxxxxxxxxxxxxxxxx", InstName.Sub, InstEmit.Sub, OpCodeAluRs.Create); + SetA64("x1001011001xxxxxxxx0xxxxxxxxxxxx", InstName.Sub, InstEmit.Sub, OpCodeAluRx.Create); + SetA64("x1001011001xxxxxxxx100xxxxxxxxxx", InstName.Sub, InstEmit.Sub, OpCodeAluRx.Create); + SetA64("x11100010xxxxxxxxxxxxxxxxxxxxxxx", InstName.Subs, InstEmit.Subs, OpCodeAluImm.Create); + SetA64("01101011<<0xxxxx0xxxxxxxxxxxxxxx", InstName.Subs, InstEmit.Subs, OpCodeAluRs.Create); + SetA64("11101011<<0xxxxxxxxxxxxxxxxxxxxx", InstName.Subs, InstEmit.Subs, OpCodeAluRs.Create); + SetA64("x1101011001xxxxxxxx0xxxxxxxxxxxx", InstName.Subs, InstEmit.Subs, OpCodeAluRx.Create); + SetA64("x1101011001xxxxxxxx100xxxxxxxxxx", InstName.Subs, InstEmit.Subs, OpCodeAluRx.Create); + SetA64("11010100000xxxxxxxxxxxxxxxx00001", InstName.Svc, InstEmit.Svc, OpCodeException.Create); + SetA64("1101010100001xxxxxxxxxxxxxxxxxxx", InstName.Sys, InstEmit.Sys, OpCodeSystem.Create); + SetA64("x0110111xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tbnz, InstEmit.Tbnz, OpCodeBImmTest.Create); + SetA64("x0110110xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tbz, InstEmit.Tbz, OpCodeBImmTest.Create); + SetA64("01010011000xxxxx0xxxxxxxxxxxxxxx", InstName.Ubfm, InstEmit.Ubfm, OpCodeBfm.Create); + SetA64("1101001101xxxxxxxxxxxxxxxxxxxxxx", InstName.Ubfm, InstEmit.Ubfm, OpCodeBfm.Create); + SetA64("x0011010110xxxxx000010xxxxxxxxxx", InstName.Udiv, InstEmit.Udiv, OpCodeAluBinary.Create); + SetA64("10011011101xxxxx0xxxxxxxxxxxxxxx", InstName.Umaddl, InstEmit.Umaddl, OpCodeMul.Create); + SetA64("10011011101xxxxx1xxxxxxxxxxxxxxx", InstName.Umsubl, InstEmit.Umsubl, OpCodeMul.Create); + SetA64("10011011110xxxxx0xxxxxxxxxxxxxxx", InstName.Umulh, InstEmit.Umulh, OpCodeMul.Create); + SetA64("11010101000000110010000001011111", InstName.Wfe, InstEmit.Nop, OpCodeSystem.Create); + SetA64("11010101000000110010000001111111", InstName.Wfi, InstEmit.Nop, OpCodeSystem.Create); + SetA64("11010101000000110010000000111111", InstName.Yield, InstEmit.Nop, OpCodeSystem.Create); + + // FP & SIMD + SetA64("0101111011100000101110xxxxxxxxxx", InstName.Abs_S, InstEmit.Abs_S, OpCodeSimd.Create); + SetA64("0>001110<<100000101110xxxxxxxxxx", InstName.Abs_V, InstEmit.Abs_V, OpCodeSimd.Create); + SetA64("01011110111xxxxx100001xxxxxxxxxx", InstName.Add_S, InstEmit.Add_S, OpCodeSimdReg.Create); + SetA64("0>001110<<1xxxxx100001xxxxxxxxxx", InstName.Add_V, InstEmit.Add_V, OpCodeSimdReg.Create); + SetA64("0x001110<<1xxxxx010000xxxxxxxxxx", InstName.Addhn_V, InstEmit.Addhn_V, OpCodeSimdReg.Create); + SetA64("0101111011110001101110xxxxxxxxxx", InstName.Addp_S, InstEmit.Addp_S, OpCodeSimd.Create); + SetA64("0>001110<<1xxxxx101111xxxxxxxxxx", InstName.Addp_V, InstEmit.Addp_V, OpCodeSimdReg.Create); + SetA64("000011100x110001101110xxxxxxxxxx", InstName.Addv_V, InstEmit.Addv_V, OpCodeSimd.Create); + SetA64("01001110<<110001101110xxxxxxxxxx", InstName.Addv_V, InstEmit.Addv_V, OpCodeSimd.Create); + SetA64("0100111000101000010110xxxxxxxxxx", InstName.Aesd_V, InstEmit.Aesd_V, OpCodeSimd.Create); + SetA64("0100111000101000010010xxxxxxxxxx", InstName.Aese_V, InstEmit.Aese_V, OpCodeSimd.Create); + SetA64("0100111000101000011110xxxxxxxxxx", InstName.Aesimc_V, InstEmit.Aesimc_V, OpCodeSimd.Create); + SetA64("0100111000101000011010xxxxxxxxxx", InstName.Aesmc_V, InstEmit.Aesmc_V, OpCodeSimd.Create); + SetA64("0x001110001xxxxx000111xxxxxxxxxx", InstName.And_V, InstEmit.And_V, OpCodeSimdReg.Create); + SetA64("0x001110011xxxxx000111xxxxxxxxxx", InstName.Bic_V, InstEmit.Bic_V, OpCodeSimdReg.Create); + SetA64("0x10111100000xxx0xx101xxxxxxxxxx", InstName.Bic_Vi, InstEmit.Bic_Vi, OpCodeSimdImm.Create); + SetA64("0x10111100000xxx10x101xxxxxxxxxx", InstName.Bic_Vi, InstEmit.Bic_Vi, OpCodeSimdImm.Create); + SetA64("0x101110111xxxxx000111xxxxxxxxxx", InstName.Bif_V, InstEmit.Bif_V, OpCodeSimdReg.Create); + SetA64("0x101110101xxxxx000111xxxxxxxxxx", InstName.Bit_V, InstEmit.Bit_V, OpCodeSimdReg.Create); + SetA64("0x101110011xxxxx000111xxxxxxxxxx", InstName.Bsl_V, InstEmit.Bsl_V, OpCodeSimdReg.Create); + SetA64("0x001110<<100000010010xxxxxxxxxx", InstName.Cls_V, InstEmit.Cls_V, OpCodeSimd.Create); + SetA64("0x101110<<100000010010xxxxxxxxxx", InstName.Clz_V, InstEmit.Clz_V, OpCodeSimd.Create); + SetA64("01111110111xxxxx100011xxxxxxxxxx", InstName.Cmeq_S, InstEmit.Cmeq_S, OpCodeSimdReg.Create); + SetA64("0101111011100000100110xxxxxxxxxx", InstName.Cmeq_S, InstEmit.Cmeq_S, OpCodeSimd.Create); + SetA64("0>101110<<1xxxxx100011xxxxxxxxxx", InstName.Cmeq_V, InstEmit.Cmeq_V, OpCodeSimdReg.Create); + SetA64("0>001110<<100000100110xxxxxxxxxx", InstName.Cmeq_V, InstEmit.Cmeq_V, OpCodeSimd.Create); + SetA64("01011110111xxxxx001111xxxxxxxxxx", InstName.Cmge_S, InstEmit.Cmge_S, OpCodeSimdReg.Create); + SetA64("0111111011100000100010xxxxxxxxxx", InstName.Cmge_S, InstEmit.Cmge_S, OpCodeSimd.Create); + SetA64("0>001110<<1xxxxx001111xxxxxxxxxx", InstName.Cmge_V, InstEmit.Cmge_V, OpCodeSimdReg.Create); + SetA64("0>101110<<100000100010xxxxxxxxxx", InstName.Cmge_V, InstEmit.Cmge_V, OpCodeSimd.Create); + SetA64("01011110111xxxxx001101xxxxxxxxxx", InstName.Cmgt_S, InstEmit.Cmgt_S, OpCodeSimdReg.Create); + SetA64("0101111011100000100010xxxxxxxxxx", InstName.Cmgt_S, InstEmit.Cmgt_S, OpCodeSimd.Create); + SetA64("0>001110<<1xxxxx001101xxxxxxxxxx", InstName.Cmgt_V, InstEmit.Cmgt_V, OpCodeSimdReg.Create); + SetA64("0>001110<<100000100010xxxxxxxxxx", InstName.Cmgt_V, InstEmit.Cmgt_V, OpCodeSimd.Create); + SetA64("01111110111xxxxx001101xxxxxxxxxx", InstName.Cmhi_S, InstEmit.Cmhi_S, OpCodeSimdReg.Create); + SetA64("0>101110<<1xxxxx001101xxxxxxxxxx", InstName.Cmhi_V, InstEmit.Cmhi_V, OpCodeSimdReg.Create); + SetA64("01111110111xxxxx001111xxxxxxxxxx", InstName.Cmhs_S, InstEmit.Cmhs_S, OpCodeSimdReg.Create); + SetA64("0>101110<<1xxxxx001111xxxxxxxxxx", InstName.Cmhs_V, InstEmit.Cmhs_V, OpCodeSimdReg.Create); + SetA64("0111111011100000100110xxxxxxxxxx", InstName.Cmle_S, InstEmit.Cmle_S, OpCodeSimd.Create); + SetA64("0>101110<<100000100110xxxxxxxxxx", InstName.Cmle_V, InstEmit.Cmle_V, OpCodeSimd.Create); + SetA64("0101111011100000101010xxxxxxxxxx", InstName.Cmlt_S, InstEmit.Cmlt_S, OpCodeSimd.Create); + SetA64("0>001110<<100000101010xxxxxxxxxx", InstName.Cmlt_V, InstEmit.Cmlt_V, OpCodeSimd.Create); + SetA64("01011110111xxxxx100011xxxxxxxxxx", InstName.Cmtst_S, InstEmit.Cmtst_S, OpCodeSimdReg.Create); + SetA64("0>001110<<1xxxxx100011xxxxxxxxxx", InstName.Cmtst_V, InstEmit.Cmtst_V, OpCodeSimdReg.Create); + SetA64("0x00111000100000010110xxxxxxxxxx", InstName.Cnt_V, InstEmit.Cnt_V, OpCodeSimd.Create); + SetA64("0>001110000x<>>>000011xxxxxxxxxx", InstName.Dup_Gp, InstEmit.Dup_Gp, OpCodeSimdIns.Create); + SetA64("01011110000xxxxx000001xxxxxxxxxx", InstName.Dup_S, InstEmit.Dup_S, OpCodeSimdIns.Create); + SetA64("0>001110000x<>>>000001xxxxxxxxxx", InstName.Dup_V, InstEmit.Dup_V, OpCodeSimdIns.Create); + SetA64("0x101110001xxxxx000111xxxxxxxxxx", InstName.Eor_V, InstEmit.Eor_V, OpCodeSimdReg.Create); + SetA64("0>101110000xxxxx0<xxx0xxxxxxxxxx", InstName.Ext_V, InstEmit.Ext_V, OpCodeSimdExt.Create); + SetA64("011111101x1xxxxx110101xxxxxxxxxx", InstName.Fabd_S, InstEmit.Fabd_S, OpCodeSimdReg.Create); + SetA64("0>1011101<1xxxxx110101xxxxxxxxxx", InstName.Fabd_V, InstEmit.Fabd_V, OpCodeSimdReg.Create); + SetA64("000111100x100000110000xxxxxxxxxx", InstName.Fabs_S, InstEmit.Fabs_S, OpCodeSimd.Create); + SetA64("0>0011101<100000111110xxxxxxxxxx", InstName.Fabs_V, InstEmit.Fabs_V, OpCodeSimd.Create); + SetA64("011111100x1xxxxx111011xxxxxxxxxx", InstName.Facge_S, InstEmit.Facge_S, OpCodeSimdReg.Create); + SetA64("0>1011100<1xxxxx111011xxxxxxxxxx", InstName.Facge_V, InstEmit.Facge_V, OpCodeSimdReg.Create); + SetA64("011111101x1xxxxx111011xxxxxxxxxx", InstName.Facgt_S, InstEmit.Facgt_S, OpCodeSimdReg.Create); + SetA64("0>1011101<1xxxxx111011xxxxxxxxxx", InstName.Facgt_V, InstEmit.Facgt_V, OpCodeSimdReg.Create); + SetA64("000111100x1xxxxx001010xxxxxxxxxx", InstName.Fadd_S, InstEmit.Fadd_S, OpCodeSimdReg.Create); + SetA64("0>0011100<1xxxxx110101xxxxxxxxxx", InstName.Fadd_V, InstEmit.Fadd_V, OpCodeSimdReg.Create); + SetA64("011111100x110000110110xxxxxxxxxx", InstName.Faddp_S, InstEmit.Faddp_S, OpCodeSimd.Create); + SetA64("0>1011100<1xxxxx110101xxxxxxxxxx", InstName.Faddp_V, InstEmit.Faddp_V, OpCodeSimdReg.Create); + SetA64("000111100x1xxxxxxxxx01xxxxx0xxxx", InstName.Fccmp_S, InstEmit.Fccmp_S, OpCodeSimdFcond.Create); + SetA64("000111100x1xxxxxxxxx01xxxxx1xxxx", InstName.Fccmpe_S, InstEmit.Fccmpe_S, OpCodeSimdFcond.Create); + SetA64("010111100x1xxxxx111001xxxxxxxxxx", InstName.Fcmeq_S, InstEmit.Fcmeq_S, OpCodeSimdReg.Create); + SetA64("010111101x100000110110xxxxxxxxxx", InstName.Fcmeq_S, InstEmit.Fcmeq_S, OpCodeSimd.Create); + SetA64("0>0011100<1xxxxx111001xxxxxxxxxx", InstName.Fcmeq_V, InstEmit.Fcmeq_V, OpCodeSimdReg.Create); + SetA64("0>0011101<100000110110xxxxxxxxxx", InstName.Fcmeq_V, InstEmit.Fcmeq_V, OpCodeSimd.Create); + SetA64("011111100x1xxxxx111001xxxxxxxxxx", InstName.Fcmge_S, InstEmit.Fcmge_S, OpCodeSimdReg.Create); + SetA64("011111101x100000110010xxxxxxxxxx", InstName.Fcmge_S, InstEmit.Fcmge_S, OpCodeSimd.Create); + SetA64("0>1011100<1xxxxx111001xxxxxxxxxx", InstName.Fcmge_V, InstEmit.Fcmge_V, OpCodeSimdReg.Create); + SetA64("0>1011101<100000110010xxxxxxxxxx", InstName.Fcmge_V, InstEmit.Fcmge_V, OpCodeSimd.Create); + SetA64("011111101x1xxxxx111001xxxxxxxxxx", InstName.Fcmgt_S, InstEmit.Fcmgt_S, OpCodeSimdReg.Create); + SetA64("010111101x100000110010xxxxxxxxxx", InstName.Fcmgt_S, InstEmit.Fcmgt_S, OpCodeSimd.Create); + SetA64("0>1011101<1xxxxx111001xxxxxxxxxx", InstName.Fcmgt_V, InstEmit.Fcmgt_V, OpCodeSimdReg.Create); + SetA64("0>0011101<100000110010xxxxxxxxxx", InstName.Fcmgt_V, InstEmit.Fcmgt_V, OpCodeSimd.Create); + SetA64("011111101x100000110110xxxxxxxxxx", InstName.Fcmle_S, InstEmit.Fcmle_S, OpCodeSimd.Create); + SetA64("0>1011101<100000110110xxxxxxxxxx", InstName.Fcmle_V, InstEmit.Fcmle_V, OpCodeSimd.Create); + SetA64("010111101x100000111010xxxxxxxxxx", InstName.Fcmlt_S, InstEmit.Fcmlt_S, OpCodeSimd.Create); + SetA64("0>0011101<100000111010xxxxxxxxxx", InstName.Fcmlt_V, InstEmit.Fcmlt_V, OpCodeSimd.Create); + SetA64("000111100x1xxxxx001000xxxxx0x000", InstName.Fcmp_S, InstEmit.Fcmp_S, OpCodeSimdReg.Create); + SetA64("000111100x1xxxxx001000xxxxx1x000", InstName.Fcmpe_S, InstEmit.Fcmpe_S, OpCodeSimdReg.Create); + SetA64("000111100x1xxxxxxxxx11xxxxxxxxxx", InstName.Fcsel_S, InstEmit.Fcsel_S, OpCodeSimdFcond.Create); + SetA64("00011110xx10001xx10000xxxxxxxxxx", InstName.Fcvt_S, InstEmit.Fcvt_S, OpCodeSimd.Create); + SetA64("x00111100x100100000000xxxxxxxxxx", InstName.Fcvtas_Gp, InstEmit.Fcvtas_Gp, OpCodeSimdCvt.Create); + SetA64("010111100x100001110010xxxxxxxxxx", InstName.Fcvtas_S, InstEmit.Fcvtas_S, OpCodeSimd.Create); + SetA64("0>0011100<100001110010xxxxxxxxxx", InstName.Fcvtas_V, InstEmit.Fcvtas_V, OpCodeSimd.Create); + SetA64("x00111100x100101000000xxxxxxxxxx", InstName.Fcvtau_Gp, InstEmit.Fcvtau_Gp, OpCodeSimdCvt.Create); + SetA64("011111100x100001110010xxxxxxxxxx", InstName.Fcvtau_S, InstEmit.Fcvtau_S, OpCodeSimd.Create); + SetA64("0>1011100<100001110010xxxxxxxxxx", InstName.Fcvtau_V, InstEmit.Fcvtau_V, OpCodeSimd.Create); + SetA64("0x0011100x100001011110xxxxxxxxxx", InstName.Fcvtl_V, InstEmit.Fcvtl_V, OpCodeSimd.Create); + SetA64("x00111100x110000000000xxxxxxxxxx", InstName.Fcvtms_Gp, InstEmit.Fcvtms_Gp, OpCodeSimdCvt.Create); + SetA64("0>0011100<100001101110xxxxxxxxxx", InstName.Fcvtms_V, InstEmit.Fcvtms_V, OpCodeSimd.Create); + SetA64("x00111100x110001000000xxxxxxxxxx", InstName.Fcvtmu_Gp, InstEmit.Fcvtmu_Gp, OpCodeSimdCvt.Create); + SetA64("0x0011100x100001011010xxxxxxxxxx", InstName.Fcvtn_V, InstEmit.Fcvtn_V, OpCodeSimd.Create); + SetA64("x00111100x100000000000xxxxxxxxxx", InstName.Fcvtns_Gp, InstEmit.Fcvtns_Gp, OpCodeSimdCvt.Create); + SetA64("010111100x100001101010xxxxxxxxxx", InstName.Fcvtns_S, InstEmit.Fcvtns_S, OpCodeSimd.Create); + SetA64("0>0011100<100001101010xxxxxxxxxx", InstName.Fcvtns_V, InstEmit.Fcvtns_V, OpCodeSimd.Create); + SetA64("011111100x100001101010xxxxxxxxxx", InstName.Fcvtnu_S, InstEmit.Fcvtnu_S, OpCodeSimd.Create); + SetA64("0>1011100<100001101010xxxxxxxxxx", InstName.Fcvtnu_V, InstEmit.Fcvtnu_V, OpCodeSimd.Create); + SetA64("x00111100x101000000000xxxxxxxxxx", InstName.Fcvtps_Gp, InstEmit.Fcvtps_Gp, OpCodeSimdCvt.Create); + SetA64("x00111100x101001000000xxxxxxxxxx", InstName.Fcvtpu_Gp, InstEmit.Fcvtpu_Gp, OpCodeSimdCvt.Create); + SetA64("x00111100x111000000000xxxxxxxxxx", InstName.Fcvtzs_Gp, InstEmit.Fcvtzs_Gp, OpCodeSimdCvt.Create); + SetA64(">00111100x011000>xxxxxxxxxxxxxxx", InstName.Fcvtzs_Gp_Fixed, InstEmit.Fcvtzs_Gp_Fixed, OpCodeSimdCvt.Create); + SetA64("010111101x100001101110xxxxxxxxxx", InstName.Fcvtzs_S, InstEmit.Fcvtzs_S, OpCodeSimd.Create); + SetA64("0>0011101<100001101110xxxxxxxxxx", InstName.Fcvtzs_V, InstEmit.Fcvtzs_V, OpCodeSimd.Create); + SetA64("0x001111001xxxxx111111xxxxxxxxxx", InstName.Fcvtzs_V_Fixed, InstEmit.Fcvtzs_V_Fixed, OpCodeSimdShImm.Create); + SetA64("0100111101xxxxxx111111xxxxxxxxxx", InstName.Fcvtzs_V_Fixed, InstEmit.Fcvtzs_V_Fixed, OpCodeSimdShImm.Create); + SetA64("x00111100x111001000000xxxxxxxxxx", InstName.Fcvtzu_Gp, InstEmit.Fcvtzu_Gp, OpCodeSimdCvt.Create); + SetA64(">00111100x011001>xxxxxxxxxxxxxxx", InstName.Fcvtzu_Gp_Fixed, InstEmit.Fcvtzu_Gp_Fixed, OpCodeSimdCvt.Create); + SetA64("011111101x100001101110xxxxxxxxxx", InstName.Fcvtzu_S, InstEmit.Fcvtzu_S, OpCodeSimd.Create); + SetA64("0>1011101<100001101110xxxxxxxxxx", InstName.Fcvtzu_V, InstEmit.Fcvtzu_V, OpCodeSimd.Create); + SetA64("0x101111001xxxxx111111xxxxxxxxxx", InstName.Fcvtzu_V_Fixed, InstEmit.Fcvtzu_V_Fixed, OpCodeSimdShImm.Create); + SetA64("0110111101xxxxxx111111xxxxxxxxxx", InstName.Fcvtzu_V_Fixed, InstEmit.Fcvtzu_V_Fixed, OpCodeSimdShImm.Create); + SetA64("000111100x1xxxxx000110xxxxxxxxxx", InstName.Fdiv_S, InstEmit.Fdiv_S, OpCodeSimdReg.Create); + SetA64("0>1011100<1xxxxx111111xxxxxxxxxx", InstName.Fdiv_V, InstEmit.Fdiv_V, OpCodeSimdReg.Create); + SetA64("000111110x0xxxxx0xxxxxxxxxxxxxxx", InstName.Fmadd_S, InstEmit.Fmadd_S, OpCodeSimdReg.Create); + SetA64("000111100x1xxxxx010010xxxxxxxxxx", InstName.Fmax_S, InstEmit.Fmax_S, OpCodeSimdReg.Create); + SetA64("0>0011100<1xxxxx111101xxxxxxxxxx", InstName.Fmax_V, InstEmit.Fmax_V, OpCodeSimdReg.Create); + SetA64("000111100x1xxxxx011010xxxxxxxxxx", InstName.Fmaxnm_S, InstEmit.Fmaxnm_S, OpCodeSimdReg.Create); + SetA64("0>0011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnm_V, InstEmit.Fmaxnm_V, OpCodeSimdReg.Create); + SetA64("011111100x110000110010xxxxxxxxxx", InstName.Fmaxnmp_S, InstEmit.Fmaxnmp_S, OpCodeSimd.Create); + SetA64("0>1011100<1xxxxx110001xxxxxxxxxx", InstName.Fmaxnmp_V, InstEmit.Fmaxnmp_V, OpCodeSimdReg.Create); + SetA64("0110111000110000110010xxxxxxxxxx", InstName.Fmaxnmv_V, InstEmit.Fmaxnmv_V, OpCodeSimd.Create); + SetA64("0>1011100<1xxxxx111101xxxxxxxxxx", InstName.Fmaxp_V, InstEmit.Fmaxp_V, OpCodeSimdReg.Create); + SetA64("0110111000110000111110xxxxxxxxxx", InstName.Fmaxv_V, InstEmit.Fmaxv_V, OpCodeSimd.Create); + SetA64("000111100x1xxxxx010110xxxxxxxxxx", InstName.Fmin_S, InstEmit.Fmin_S, OpCodeSimdReg.Create); + SetA64("0>0011101<1xxxxx111101xxxxxxxxxx", InstName.Fmin_V, InstEmit.Fmin_V, OpCodeSimdReg.Create); + SetA64("000111100x1xxxxx011110xxxxxxxxxx", InstName.Fminnm_S, InstEmit.Fminnm_S, OpCodeSimdReg.Create); + SetA64("0>0011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnm_V, InstEmit.Fminnm_V, OpCodeSimdReg.Create); + SetA64("011111101x110000110010xxxxxxxxxx", InstName.Fminnmp_S, InstEmit.Fminnmp_S, OpCodeSimd.Create); + SetA64("0>1011101<1xxxxx110001xxxxxxxxxx", InstName.Fminnmp_V, InstEmit.Fminnmp_V, OpCodeSimdReg.Create); + SetA64("0110111010110000110010xxxxxxxxxx", InstName.Fminnmv_V, InstEmit.Fminnmv_V, OpCodeSimd.Create); + SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", InstName.Fminp_V, InstEmit.Fminp_V, OpCodeSimdReg.Create); + SetA64("0110111010110000111110xxxxxxxxxx", InstName.Fminv_V, InstEmit.Fminv_V, OpCodeSimd.Create); + SetA64("010111111xxxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Se, InstEmit.Fmla_Se, OpCodeSimdRegElemF.Create); + SetA64("0>0011100<1xxxxx110011xxxxxxxxxx", InstName.Fmla_V, InstEmit.Fmla_V, OpCodeSimdReg.Create); + SetA64("0>0011111<xxxxxx0001x0xxxxxxxxxx", InstName.Fmla_Ve, InstEmit.Fmla_Ve, OpCodeSimdRegElemF.Create); + SetA64("010111111xxxxxxx0101x0xxxxxxxxxx", InstName.Fmls_Se, InstEmit.Fmls_Se, OpCodeSimdRegElemF.Create); + SetA64("0>0011101<1xxxxx110011xxxxxxxxxx", InstName.Fmls_V, InstEmit.Fmls_V, OpCodeSimdReg.Create); + SetA64("0>0011111<xxxxxx0101x0xxxxxxxxxx", InstName.Fmls_Ve, InstEmit.Fmls_Ve, OpCodeSimdRegElemF.Create); + SetA64("000111100x100000010000xxxxxxxxxx", InstName.Fmov_S, InstEmit.Fmov_S, OpCodeSimd.Create); + SetA64("000111100x1xxxxxxxx10000000xxxxx", InstName.Fmov_Si, InstEmit.Fmov_Si, OpCodeSimdFmov.Create); + SetA64("0x00111100000xxx111101xxxxxxxxxx", InstName.Fmov_Vi, InstEmit.Fmov_Vi, OpCodeSimdImm.Create); + SetA64("0110111100000xxx111101xxxxxxxxxx", InstName.Fmov_Vi, InstEmit.Fmov_Vi, OpCodeSimdImm.Create); + SetA64("0001111000100110000000xxxxxxxxxx", InstName.Fmov_Ftoi, InstEmit.Fmov_Ftoi, OpCodeSimd.Create); + SetA64("1001111001100110000000xxxxxxxxxx", InstName.Fmov_Ftoi, InstEmit.Fmov_Ftoi, OpCodeSimd.Create); + SetA64("0001111000100111000000xxxxxxxxxx", InstName.Fmov_Itof, InstEmit.Fmov_Itof, OpCodeSimd.Create); + SetA64("1001111001100111000000xxxxxxxxxx", InstName.Fmov_Itof, InstEmit.Fmov_Itof, OpCodeSimd.Create); + SetA64("1001111010101110000000xxxxxxxxxx", InstName.Fmov_Ftoi1, InstEmit.Fmov_Ftoi1, OpCodeSimd.Create); + SetA64("1001111010101111000000xxxxxxxxxx", InstName.Fmov_Itof1, InstEmit.Fmov_Itof1, OpCodeSimd.Create); + SetA64("000111110x0xxxxx1xxxxxxxxxxxxxxx", InstName.Fmsub_S, InstEmit.Fmsub_S, OpCodeSimdReg.Create); + SetA64("000111100x1xxxxx000010xxxxxxxxxx", InstName.Fmul_S, InstEmit.Fmul_S, OpCodeSimdReg.Create); + SetA64("010111111xxxxxxx1001x0xxxxxxxxxx", InstName.Fmul_Se, InstEmit.Fmul_Se, OpCodeSimdRegElemF.Create); + SetA64("0>1011100<1xxxxx110111xxxxxxxxxx", InstName.Fmul_V, InstEmit.Fmul_V, OpCodeSimdReg.Create); + SetA64("0>0011111<xxxxxx1001x0xxxxxxxxxx", InstName.Fmul_Ve, InstEmit.Fmul_Ve, OpCodeSimdRegElemF.Create); + SetA64("010111100x1xxxxx110111xxxxxxxxxx", InstName.Fmulx_S, InstEmit.Fmulx_S, OpCodeSimdReg.Create); + SetA64("011111111xxxxxxx1001x0xxxxxxxxxx", InstName.Fmulx_Se, InstEmit.Fmulx_Se, OpCodeSimdRegElemF.Create); + SetA64("0>0011100<1xxxxx110111xxxxxxxxxx", InstName.Fmulx_V, InstEmit.Fmulx_V, OpCodeSimdReg.Create); + SetA64("0>1011111<xxxxxx1001x0xxxxxxxxxx", InstName.Fmulx_Ve, InstEmit.Fmulx_Ve, OpCodeSimdRegElemF.Create); + SetA64("000111100x100001010000xxxxxxxxxx", InstName.Fneg_S, InstEmit.Fneg_S, OpCodeSimd.Create); + SetA64("0>1011101<100000111110xxxxxxxxxx", InstName.Fneg_V, InstEmit.Fneg_V, OpCodeSimd.Create); + SetA64("000111110x1xxxxx0xxxxxxxxxxxxxxx", InstName.Fnmadd_S, InstEmit.Fnmadd_S, OpCodeSimdReg.Create); + SetA64("000111110x1xxxxx1xxxxxxxxxxxxxxx", InstName.Fnmsub_S, InstEmit.Fnmsub_S, OpCodeSimdReg.Create); + SetA64("000111100x1xxxxx100010xxxxxxxxxx", InstName.Fnmul_S, InstEmit.Fnmul_S, OpCodeSimdReg.Create); + SetA64("010111101x100001110110xxxxxxxxxx", InstName.Frecpe_S, InstEmit.Frecpe_S, OpCodeSimd.Create); + SetA64("0>0011101<100001110110xxxxxxxxxx", InstName.Frecpe_V, InstEmit.Frecpe_V, OpCodeSimd.Create); + SetA64("010111100x1xxxxx111111xxxxxxxxxx", InstName.Frecps_S, InstEmit.Frecps_S, OpCodeSimdReg.Create); + SetA64("0>0011100<1xxxxx111111xxxxxxxxxx", InstName.Frecps_V, InstEmit.Frecps_V, OpCodeSimdReg.Create); + SetA64("010111101x100001111110xxxxxxxxxx", InstName.Frecpx_S, InstEmit.Frecpx_S, OpCodeSimd.Create); + SetA64("000111100x100110010000xxxxxxxxxx", InstName.Frinta_S, InstEmit.Frinta_S, OpCodeSimd.Create); + SetA64("0>1011100<100001100010xxxxxxxxxx", InstName.Frinta_V, InstEmit.Frinta_V, OpCodeSimd.Create); + SetA64("000111100x100111110000xxxxxxxxxx", InstName.Frinti_S, InstEmit.Frinti_S, OpCodeSimd.Create); + SetA64("0>1011101<100001100110xxxxxxxxxx", InstName.Frinti_V, InstEmit.Frinti_V, OpCodeSimd.Create); + SetA64("000111100x100101010000xxxxxxxxxx", InstName.Frintm_S, InstEmit.Frintm_S, OpCodeSimd.Create); + SetA64("0>0011100<100001100110xxxxxxxxxx", InstName.Frintm_V, InstEmit.Frintm_V, OpCodeSimd.Create); + SetA64("000111100x100100010000xxxxxxxxxx", InstName.Frintn_S, InstEmit.Frintn_S, OpCodeSimd.Create); + SetA64("0>0011100<100001100010xxxxxxxxxx", InstName.Frintn_V, InstEmit.Frintn_V, OpCodeSimd.Create); + SetA64("000111100x100100110000xxxxxxxxxx", InstName.Frintp_S, InstEmit.Frintp_S, OpCodeSimd.Create); + SetA64("0>0011101<100001100010xxxxxxxxxx", InstName.Frintp_V, InstEmit.Frintp_V, OpCodeSimd.Create); + SetA64("000111100x100111010000xxxxxxxxxx", InstName.Frintx_S, InstEmit.Frintx_S, OpCodeSimd.Create); + SetA64("0>1011100<100001100110xxxxxxxxxx", InstName.Frintx_V, InstEmit.Frintx_V, OpCodeSimd.Create); + SetA64("000111100x100101110000xxxxxxxxxx", InstName.Frintz_S, InstEmit.Frintz_S, OpCodeSimd.Create); + SetA64("0>0011101<100001100110xxxxxxxxxx", InstName.Frintz_V, InstEmit.Frintz_V, OpCodeSimd.Create); + SetA64("011111101x100001110110xxxxxxxxxx", InstName.Frsqrte_S, InstEmit.Frsqrte_S, OpCodeSimd.Create); + SetA64("0>1011101<100001110110xxxxxxxxxx", InstName.Frsqrte_V, InstEmit.Frsqrte_V, OpCodeSimd.Create); + SetA64("010111101x1xxxxx111111xxxxxxxxxx", InstName.Frsqrts_S, InstEmit.Frsqrts_S, OpCodeSimdReg.Create); + SetA64("0>0011101<1xxxxx111111xxxxxxxxxx", InstName.Frsqrts_V, InstEmit.Frsqrts_V, OpCodeSimdReg.Create); + SetA64("000111100x100001110000xxxxxxxxxx", InstName.Fsqrt_S, InstEmit.Fsqrt_S, OpCodeSimd.Create); + SetA64("0>1011101<100001111110xxxxxxxxxx", InstName.Fsqrt_V, InstEmit.Fsqrt_V, OpCodeSimd.Create); + SetA64("000111100x1xxxxx001110xxxxxxxxxx", InstName.Fsub_S, InstEmit.Fsub_S, OpCodeSimdReg.Create); + SetA64("0>0011101<1xxxxx110101xxxxxxxxxx", InstName.Fsub_V, InstEmit.Fsub_V, OpCodeSimdReg.Create); + SetA64("01001110000xxxxx000111xxxxxxxxxx", InstName.Ins_Gp, InstEmit.Ins_Gp, OpCodeSimdIns.Create); + SetA64("01101110000xxxxx0xxxx1xxxxxxxxxx", InstName.Ins_V, InstEmit.Ins_V, OpCodeSimdIns.Create); + SetA64("0x00110001000000xxxxxxxxxxxxxxxx", InstName.Ld__Vms, InstEmit.Ld__Vms, OpCodeSimdMemMs.Create); + SetA64("0x001100110xxxxxxxxxxxxxxxxxxxxx", InstName.Ld__Vms, InstEmit.Ld__Vms, OpCodeSimdMemMs.Create); + SetA64("0x00110101x00000xxxxxxxxxxxxxxxx", InstName.Ld__Vss, InstEmit.Ld__Vss, OpCodeSimdMemSs.Create); + SetA64("0x00110111xxxxxxxxxxxxxxxxxxxxxx", InstName.Ld__Vss, InstEmit.Ld__Vss, OpCodeSimdMemSs.Create); + SetA64("<<10110xx1xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldp, InstEmit.Ldp, OpCodeSimdMemPair.Create); + SetA64("xx111100x10xxxxxxxxx00xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, OpCodeSimdMemImm.Create); + SetA64("xx111100x10xxxxxxxxx01xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, OpCodeSimdMemImm.Create); + SetA64("xx111100x10xxxxxxxxx11xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, OpCodeSimdMemImm.Create); + SetA64("xx111101x1xxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, OpCodeSimdMemImm.Create); + SetA64("xx111100x11xxxxxx1xx10xxxxxxxxxx", InstName.Ldr, InstEmit.Ldr, OpCodeSimdMemReg.Create); + SetA64("xx011100xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldr_Literal, InstEmit.Ldr_Literal, OpCodeSimdMemLit.Create); + SetA64("0x001110<<1xxxxx100101xxxxxxxxxx", InstName.Mla_V, InstEmit.Mla_V, OpCodeSimdReg.Create); + SetA64("0x101111xxxxxxxx0000x0xxxxxxxxxx", InstName.Mla_Ve, InstEmit.Mla_Ve, OpCodeSimdRegElem.Create); + SetA64("0x101110<<1xxxxx100101xxxxxxxxxx", InstName.Mls_V, InstEmit.Mls_V, OpCodeSimdReg.Create); + SetA64("0x101111xxxxxxxx0100x0xxxxxxxxxx", InstName.Mls_Ve, InstEmit.Mls_Ve, OpCodeSimdRegElem.Create); + SetA64("0x00111100000xxx0xx001xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, OpCodeSimdImm.Create); + SetA64("0x00111100000xxx10x001xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, OpCodeSimdImm.Create); + SetA64("0x00111100000xxx110x01xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, OpCodeSimdImm.Create); + SetA64("0xx0111100000xxx111001xxxxxxxxxx", InstName.Movi_V, InstEmit.Movi_V, OpCodeSimdImm.Create); + SetA64("0x001110<<1xxxxx100111xxxxxxxxxx", InstName.Mul_V, InstEmit.Mul_V, OpCodeSimdReg.Create); + SetA64("0x001111xxxxxxxx1000x0xxxxxxxxxx", InstName.Mul_Ve, InstEmit.Mul_Ve, OpCodeSimdRegElem.Create); + SetA64("0x10111100000xxx0xx001xxxxxxxxxx", InstName.Mvni_V, InstEmit.Mvni_V, OpCodeSimdImm.Create); + SetA64("0x10111100000xxx10x001xxxxxxxxxx", InstName.Mvni_V, InstEmit.Mvni_V, OpCodeSimdImm.Create); + SetA64("0x10111100000xxx110x01xxxxxxxxxx", InstName.Mvni_V, InstEmit.Mvni_V, OpCodeSimdImm.Create); + SetA64("0111111011100000101110xxxxxxxxxx", InstName.Neg_S, InstEmit.Neg_S, OpCodeSimd.Create); + SetA64("0>101110<<100000101110xxxxxxxxxx", InstName.Neg_V, InstEmit.Neg_V, OpCodeSimd.Create); + SetA64("0x10111000100000010110xxxxxxxxxx", InstName.Not_V, InstEmit.Not_V, OpCodeSimd.Create); + SetA64("0x001110111xxxxx000111xxxxxxxxxx", InstName.Orn_V, InstEmit.Orn_V, OpCodeSimdReg.Create); + SetA64("0x001110101xxxxx000111xxxxxxxxxx", InstName.Orr_V, InstEmit.Orr_V, OpCodeSimdReg.Create); + SetA64("0x00111100000xxx0xx101xxxxxxxxxx", InstName.Orr_Vi, InstEmit.Orr_Vi, OpCodeSimdImm.Create); + SetA64("0x00111100000xxx10x101xxxxxxxxxx", InstName.Orr_Vi, InstEmit.Orr_Vi, OpCodeSimdImm.Create); + SetA64("0x001110001xxxxx111000xxxxxxxxxx", InstName.Pmull_V, InstEmit.Pmull_V, OpCodeSimdReg.Create); + SetA64("0x001110111xxxxx111000xxxxxxxxxx", InstName.Pmull_V, InstEmit.Pmull_V, OpCodeSimdReg.Create); + SetA64("0x101110<<1xxxxx010000xxxxxxxxxx", InstName.Raddhn_V, InstEmit.Raddhn_V, OpCodeSimdReg.Create); + SetA64("0x10111001100000010110xxxxxxxxxx", InstName.Rbit_V, InstEmit.Rbit_V, OpCodeSimd.Create); + SetA64("0x00111000100000000110xxxxxxxxxx", InstName.Rev16_V, InstEmit.Rev16_V, OpCodeSimd.Create); + SetA64("0x1011100x100000000010xxxxxxxxxx", InstName.Rev32_V, InstEmit.Rev32_V, OpCodeSimd.Create); + SetA64("0x001110<<100000000010xxxxxxxxxx", InstName.Rev64_V, InstEmit.Rev64_V, OpCodeSimd.Create); + SetA64("0x00111100>>>xxx100011xxxxxxxxxx", InstName.Rshrn_V, InstEmit.Rshrn_V, OpCodeSimdShImm.Create); + SetA64("0x101110<<1xxxxx011000xxxxxxxxxx", InstName.Rsubhn_V, InstEmit.Rsubhn_V, OpCodeSimdReg.Create); + SetA64("0x001110<<1xxxxx011111xxxxxxxxxx", InstName.Saba_V, InstEmit.Saba_V, OpCodeSimdReg.Create); + SetA64("0x001110<<1xxxxx010100xxxxxxxxxx", InstName.Sabal_V, InstEmit.Sabal_V, OpCodeSimdReg.Create); + SetA64("0x001110<<1xxxxx011101xxxxxxxxxx", InstName.Sabd_V, InstEmit.Sabd_V, OpCodeSimdReg.Create); + SetA64("0x001110<<1xxxxx011100xxxxxxxxxx", InstName.Sabdl_V, InstEmit.Sabdl_V, OpCodeSimdReg.Create); + SetA64("0x001110<<100000011010xxxxxxxxxx", InstName.Sadalp_V, InstEmit.Sadalp_V, OpCodeSimd.Create); + SetA64("0x001110<<1xxxxx000000xxxxxxxxxx", InstName.Saddl_V, InstEmit.Saddl_V, OpCodeSimdReg.Create); + SetA64("0x001110<<100000001010xxxxxxxxxx", InstName.Saddlp_V, InstEmit.Saddlp_V, OpCodeSimd.Create); + SetA64("000011100x110000001110xxxxxxxxxx", InstName.Saddlv_V, InstEmit.Saddlv_V, OpCodeSimd.Create); + SetA64("01001110<<110000001110xxxxxxxxxx", InstName.Saddlv_V, InstEmit.Saddlv_V, OpCodeSimd.Create); + SetA64("0x001110<<1xxxxx000100xxxxxxxxxx", InstName.Saddw_V, InstEmit.Saddw_V, OpCodeSimdReg.Create); + SetA64("x00111100x100010000000xxxxxxxxxx", InstName.Scvtf_Gp, InstEmit.Scvtf_Gp, OpCodeSimdCvt.Create); + SetA64(">00111100x000010>xxxxxxxxxxxxxxx", InstName.Scvtf_Gp_Fixed, InstEmit.Scvtf_Gp_Fixed, OpCodeSimdCvt.Create); + SetA64("010111100x100001110110xxxxxxxxxx", InstName.Scvtf_S, InstEmit.Scvtf_S, OpCodeSimd.Create); + SetA64("010111110>>xxxxx111001xxxxxxxxxx", InstName.Scvtf_S_Fixed, InstEmit.Scvtf_S_Fixed, OpCodeSimdShImm.Create); + SetA64("0>0011100<100001110110xxxxxxxxxx", InstName.Scvtf_V, InstEmit.Scvtf_V, OpCodeSimd.Create); + SetA64("0x001111001xxxxx111001xxxxxxxxxx", InstName.Scvtf_V_Fixed, InstEmit.Scvtf_V_Fixed, OpCodeSimdShImm.Create); + SetA64("0100111101xxxxxx111001xxxxxxxxxx", InstName.Scvtf_V_Fixed, InstEmit.Scvtf_V_Fixed, OpCodeSimdShImm.Create); + SetA64("01011110000xxxxx000000xxxxxxxxxx", InstName.Sha1c_V, InstEmit.Sha1c_V, OpCodeSimdReg.Create); + SetA64("0101111000101000000010xxxxxxxxxx", InstName.Sha1h_V, InstEmit.Sha1h_V, OpCodeSimd.Create); + SetA64("01011110000xxxxx001000xxxxxxxxxx", InstName.Sha1m_V, InstEmit.Sha1m_V, OpCodeSimdReg.Create); + SetA64("01011110000xxxxx000100xxxxxxxxxx", InstName.Sha1p_V, InstEmit.Sha1p_V, OpCodeSimdReg.Create); + SetA64("01011110000xxxxx001100xxxxxxxxxx", InstName.Sha1su0_V, InstEmit.Sha1su0_V, OpCodeSimdReg.Create); + SetA64("0101111000101000000110xxxxxxxxxx", InstName.Sha1su1_V, InstEmit.Sha1su1_V, OpCodeSimd.Create); + SetA64("01011110000xxxxx010000xxxxxxxxxx", InstName.Sha256h_V, InstEmit.Sha256h_V, OpCodeSimdReg.Create); + SetA64("01011110000xxxxx010100xxxxxxxxxx", InstName.Sha256h2_V, InstEmit.Sha256h2_V, OpCodeSimdReg.Create); + SetA64("0101111000101000001010xxxxxxxxxx", InstName.Sha256su0_V, InstEmit.Sha256su0_V, OpCodeSimd.Create); + SetA64("01011110000xxxxx011000xxxxxxxxxx", InstName.Sha256su1_V, InstEmit.Sha256su1_V, OpCodeSimdReg.Create); + SetA64("0x001110<<1xxxxx000001xxxxxxxxxx", InstName.Shadd_V, InstEmit.Shadd_V, OpCodeSimdReg.Create); + SetA64("0101111101xxxxxx010101xxxxxxxxxx", InstName.Shl_S, InstEmit.Shl_S, OpCodeSimdShImm.Create); + SetA64("0x00111100>>>xxx010101xxxxxxxxxx", InstName.Shl_V, InstEmit.Shl_V, OpCodeSimdShImm.Create); + SetA64("0100111101xxxxxx010101xxxxxxxxxx", InstName.Shl_V, InstEmit.Shl_V, OpCodeSimdShImm.Create); + SetA64("0x101110<<100001001110xxxxxxxxxx", InstName.Shll_V, InstEmit.Shll_V, OpCodeSimd.Create); + SetA64("0x00111100>>>xxx100001xxxxxxxxxx", InstName.Shrn_V, InstEmit.Shrn_V, OpCodeSimdShImm.Create); + SetA64("0x001110<<1xxxxx001001xxxxxxxxxx", InstName.Shsub_V, InstEmit.Shsub_V, OpCodeSimdReg.Create); + SetA64("0111111101xxxxxx010101xxxxxxxxxx", InstName.Sli_S, InstEmit.Sli_S, OpCodeSimdShImm.Create); + SetA64("0x10111100>>>xxx010101xxxxxxxxxx", InstName.Sli_V, InstEmit.Sli_V, OpCodeSimdShImm.Create); + SetA64("0110111101xxxxxx010101xxxxxxxxxx", InstName.Sli_V, InstEmit.Sli_V, OpCodeSimdShImm.Create); + SetA64("0x001110<<1xxxxx011001xxxxxxxxxx", InstName.Smax_V, InstEmit.Smax_V, OpCodeSimdReg.Create); + SetA64("0x001110<<1xxxxx101001xxxxxxxxxx", InstName.Smaxp_V, InstEmit.Smaxp_V, OpCodeSimdReg.Create); + SetA64("000011100x110000101010xxxxxxxxxx", InstName.Smaxv_V, InstEmit.Smaxv_V, OpCodeSimd.Create); + SetA64("01001110<<110000101010xxxxxxxxxx", InstName.Smaxv_V, InstEmit.Smaxv_V, OpCodeSimd.Create); + SetA64("0x001110<<1xxxxx011011xxxxxxxxxx", InstName.Smin_V, InstEmit.Smin_V, OpCodeSimdReg.Create); + SetA64("0x001110<<1xxxxx101011xxxxxxxxxx", InstName.Sminp_V, InstEmit.Sminp_V, OpCodeSimdReg.Create); + SetA64("000011100x110001101010xxxxxxxxxx", InstName.Sminv_V, InstEmit.Sminv_V, OpCodeSimd.Create); + SetA64("01001110<<110001101010xxxxxxxxxx", InstName.Sminv_V, InstEmit.Sminv_V, OpCodeSimd.Create); + SetA64("0x001110<<1xxxxx100000xxxxxxxxxx", InstName.Smlal_V, InstEmit.Smlal_V, OpCodeSimdReg.Create); + SetA64("0x001111xxxxxxxx0010x0xxxxxxxxxx", InstName.Smlal_Ve, InstEmit.Smlal_Ve, OpCodeSimdRegElem.Create); + SetA64("0x001110<<1xxxxx101000xxxxxxxxxx", InstName.Smlsl_V, InstEmit.Smlsl_V, OpCodeSimdReg.Create); + SetA64("0x001111xxxxxxxx0110x0xxxxxxxxxx", InstName.Smlsl_Ve, InstEmit.Smlsl_Ve, OpCodeSimdRegElem.Create); + SetA64("0x001110000xxxxx001011xxxxxxxxxx", InstName.Smov_S, InstEmit.Smov_S, OpCodeSimdIns.Create); + SetA64("0x001110<<1xxxxx110000xxxxxxxxxx", InstName.Smull_V, InstEmit.Smull_V, OpCodeSimdReg.Create); + SetA64("0x001111xxxxxxxx1010x0xxxxxxxxxx", InstName.Smull_Ve, InstEmit.Smull_Ve, OpCodeSimdRegElem.Create); + SetA64("01011110xx100000011110xxxxxxxxxx", InstName.Sqabs_S, InstEmit.Sqabs_S, OpCodeSimd.Create); + SetA64("0>001110<<100000011110xxxxxxxxxx", InstName.Sqabs_V, InstEmit.Sqabs_V, OpCodeSimd.Create); + SetA64("01011110xx1xxxxx000011xxxxxxxxxx", InstName.Sqadd_S, InstEmit.Sqadd_S, OpCodeSimdReg.Create); + SetA64("0>001110<<1xxxxx000011xxxxxxxxxx", InstName.Sqadd_V, InstEmit.Sqadd_V, OpCodeSimdReg.Create); + SetA64("01011110011xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_S, InstEmit.Sqdmulh_S, OpCodeSimdReg.Create); + SetA64("01011110101xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_S, InstEmit.Sqdmulh_S, OpCodeSimdReg.Create); + SetA64("0x001110011xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_V, InstEmit.Sqdmulh_V, OpCodeSimdReg.Create); + SetA64("0x001110101xxxxx101101xxxxxxxxxx", InstName.Sqdmulh_V, InstEmit.Sqdmulh_V, OpCodeSimdReg.Create); + SetA64("0x00111101xxxxxx1100x0xxxxxxxxxx", InstName.Sqdmulh_Ve, InstEmit.Sqdmulh_Ve, OpCodeSimdRegElem.Create); + SetA64("0x00111110xxxxxx1100x0xxxxxxxxxx", InstName.Sqdmulh_Ve, InstEmit.Sqdmulh_Ve, OpCodeSimdRegElem.Create); + SetA64("01111110xx100000011110xxxxxxxxxx", InstName.Sqneg_S, InstEmit.Sqneg_S, OpCodeSimd.Create); + SetA64("0>101110<<100000011110xxxxxxxxxx", InstName.Sqneg_V, InstEmit.Sqneg_V, OpCodeSimd.Create); + SetA64("01111110011xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_S, InstEmit.Sqrdmulh_S, OpCodeSimdReg.Create); + SetA64("01111110101xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_S, InstEmit.Sqrdmulh_S, OpCodeSimdReg.Create); + SetA64("0x101110011xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_V, InstEmit.Sqrdmulh_V, OpCodeSimdReg.Create); + SetA64("0x101110101xxxxx101101xxxxxxxxxx", InstName.Sqrdmulh_V, InstEmit.Sqrdmulh_V, OpCodeSimdReg.Create); + SetA64("0x00111101xxxxxx1101x0xxxxxxxxxx", InstName.Sqrdmulh_Ve, InstEmit.Sqrdmulh_Ve, OpCodeSimdRegElem.Create); + SetA64("0x00111110xxxxxx1101x0xxxxxxxxxx", InstName.Sqrdmulh_Ve, InstEmit.Sqrdmulh_Ve, OpCodeSimdRegElem.Create); + SetA64("0>001110<<1xxxxx010111xxxxxxxxxx", InstName.Sqrshl_V, InstEmit.Sqrshl_V, OpCodeSimdReg.Create); + SetA64("0101111100>>>xxx100111xxxxxxxxxx", InstName.Sqrshrn_S, InstEmit.Sqrshrn_S, OpCodeSimdShImm.Create); + SetA64("0x00111100>>>xxx100111xxxxxxxxxx", InstName.Sqrshrn_V, InstEmit.Sqrshrn_V, OpCodeSimdShImm.Create); + SetA64("0111111100>>>xxx100011xxxxxxxxxx", InstName.Sqrshrun_S, InstEmit.Sqrshrun_S, OpCodeSimdShImm.Create); + SetA64("0x10111100>>>xxx100011xxxxxxxxxx", InstName.Sqrshrun_V, InstEmit.Sqrshrun_V, OpCodeSimdShImm.Create); + SetA64("0>001110<<1xxxxx010011xxxxxxxxxx", InstName.Sqshl_V, InstEmit.Sqshl_V, OpCodeSimdReg.Create); + SetA64("0101111100>>>xxx100101xxxxxxxxxx", InstName.Sqshrn_S, InstEmit.Sqshrn_S, OpCodeSimdShImm.Create); + SetA64("0x00111100>>>xxx100101xxxxxxxxxx", InstName.Sqshrn_V, InstEmit.Sqshrn_V, OpCodeSimdShImm.Create); + SetA64("0111111100>>>xxx100001xxxxxxxxxx", InstName.Sqshrun_S, InstEmit.Sqshrun_S, OpCodeSimdShImm.Create); + SetA64("0x10111100>>>xxx100001xxxxxxxxxx", InstName.Sqshrun_V, InstEmit.Sqshrun_V, OpCodeSimdShImm.Create); + SetA64("01011110xx1xxxxx001011xxxxxxxxxx", InstName.Sqsub_S, InstEmit.Sqsub_S, OpCodeSimdReg.Create); + SetA64("0>001110<<1xxxxx001011xxxxxxxxxx", InstName.Sqsub_V, InstEmit.Sqsub_V, OpCodeSimdReg.Create); + SetA64("01011110<<100001010010xxxxxxxxxx", InstName.Sqxtn_S, InstEmit.Sqxtn_S, OpCodeSimd.Create); + SetA64("0x001110<<100001010010xxxxxxxxxx", InstName.Sqxtn_V, InstEmit.Sqxtn_V, OpCodeSimd.Create); + SetA64("01111110<<100001001010xxxxxxxxxx", InstName.Sqxtun_S, InstEmit.Sqxtun_S, OpCodeSimd.Create); + SetA64("0x101110<<100001001010xxxxxxxxxx", InstName.Sqxtun_V, InstEmit.Sqxtun_V, OpCodeSimd.Create); + SetA64("0x001110<<1xxxxx000101xxxxxxxxxx", InstName.Srhadd_V, InstEmit.Srhadd_V, OpCodeSimdReg.Create); + SetA64("0111111101xxxxxx010001xxxxxxxxxx", InstName.Sri_S, InstEmit.Sri_S, OpCodeSimdShImm.Create); + SetA64("0x10111100>>>xxx010001xxxxxxxxxx", InstName.Sri_V, InstEmit.Sri_V, OpCodeSimdShImm.Create); + SetA64("0110111101xxxxxx010001xxxxxxxxxx", InstName.Sri_V, InstEmit.Sri_V, OpCodeSimdShImm.Create); + SetA64("0>001110<<1xxxxx010101xxxxxxxxxx", InstName.Srshl_V, InstEmit.Srshl_V, OpCodeSimdReg.Create); + SetA64("0101111101xxxxxx001001xxxxxxxxxx", InstName.Srshr_S, InstEmit.Srshr_S, OpCodeSimdShImm.Create); + SetA64("0x00111100>>>xxx001001xxxxxxxxxx", InstName.Srshr_V, InstEmit.Srshr_V, OpCodeSimdShImm.Create); + SetA64("0100111101xxxxxx001001xxxxxxxxxx", InstName.Srshr_V, InstEmit.Srshr_V, OpCodeSimdShImm.Create); + SetA64("0101111101xxxxxx001101xxxxxxxxxx", InstName.Srsra_S, InstEmit.Srsra_S, OpCodeSimdShImm.Create); + SetA64("0x00111100>>>xxx001101xxxxxxxxxx", InstName.Srsra_V, InstEmit.Srsra_V, OpCodeSimdShImm.Create); + SetA64("0100111101xxxxxx001101xxxxxxxxxx", InstName.Srsra_V, InstEmit.Srsra_V, OpCodeSimdShImm.Create); + SetA64("01011110111xxxxx010001xxxxxxxxxx", InstName.Sshl_S, InstEmit.Sshl_S, OpCodeSimdReg.Create); + SetA64("0>001110<<1xxxxx010001xxxxxxxxxx", InstName.Sshl_V, InstEmit.Sshl_V, OpCodeSimdReg.Create); + SetA64("0x00111100>>>xxx101001xxxxxxxxxx", InstName.Sshll_V, InstEmit.Sshll_V, OpCodeSimdShImm.Create); + SetA64("0101111101xxxxxx000001xxxxxxxxxx", InstName.Sshr_S, InstEmit.Sshr_S, OpCodeSimdShImm.Create); + SetA64("0x00111100>>>xxx000001xxxxxxxxxx", InstName.Sshr_V, InstEmit.Sshr_V, OpCodeSimdShImm.Create); + SetA64("0100111101xxxxxx000001xxxxxxxxxx", InstName.Sshr_V, InstEmit.Sshr_V, OpCodeSimdShImm.Create); + SetA64("0101111101xxxxxx000101xxxxxxxxxx", InstName.Ssra_S, InstEmit.Ssra_S, OpCodeSimdShImm.Create); + SetA64("0x00111100>>>xxx000101xxxxxxxxxx", InstName.Ssra_V, InstEmit.Ssra_V, OpCodeSimdShImm.Create); + SetA64("0100111101xxxxxx000101xxxxxxxxxx", InstName.Ssra_V, InstEmit.Ssra_V, OpCodeSimdShImm.Create); + SetA64("0x001110<<1xxxxx001000xxxxxxxxxx", InstName.Ssubl_V, InstEmit.Ssubl_V, OpCodeSimdReg.Create); + SetA64("0x001110<<1xxxxx001100xxxxxxxxxx", InstName.Ssubw_V, InstEmit.Ssubw_V, OpCodeSimdReg.Create); + SetA64("0x00110000000000xxxxxxxxxxxxxxxx", InstName.St__Vms, InstEmit.St__Vms, OpCodeSimdMemMs.Create); + SetA64("0x001100100xxxxxxxxxxxxxxxxxxxxx", InstName.St__Vms, InstEmit.St__Vms, OpCodeSimdMemMs.Create); + SetA64("0x00110100x00000xxxxxxxxxxxxxxxx", InstName.St__Vss, InstEmit.St__Vss, OpCodeSimdMemSs.Create); + SetA64("0x00110110xxxxxxxxxxxxxxxxxxxxxx", InstName.St__Vss, InstEmit.St__Vss, OpCodeSimdMemSs.Create); + SetA64("<<10110xx0xxxxxxxxxxxxxxxxxxxxxx", InstName.Stp, InstEmit.Stp, OpCodeSimdMemPair.Create); + SetA64("xx111100x00xxxxxxxxx00xxxxxxxxxx", InstName.Str, InstEmit.Str, OpCodeSimdMemImm.Create); + SetA64("xx111100x00xxxxxxxxx01xxxxxxxxxx", InstName.Str, InstEmit.Str, OpCodeSimdMemImm.Create); + SetA64("xx111100x00xxxxxxxxx11xxxxxxxxxx", InstName.Str, InstEmit.Str, OpCodeSimdMemImm.Create); + SetA64("xx111101x0xxxxxxxxxxxxxxxxxxxxxx", InstName.Str, InstEmit.Str, OpCodeSimdMemImm.Create); + SetA64("xx111100x01xxxxxx1xx10xxxxxxxxxx", InstName.Str, InstEmit.Str, OpCodeSimdMemReg.Create); + SetA64("01111110111xxxxx100001xxxxxxxxxx", InstName.Sub_S, InstEmit.Sub_S, OpCodeSimdReg.Create); + SetA64("0>101110<<1xxxxx100001xxxxxxxxxx", InstName.Sub_V, InstEmit.Sub_V, OpCodeSimdReg.Create); + SetA64("0x001110<<1xxxxx011000xxxxxxxxxx", InstName.Subhn_V, InstEmit.Subhn_V, OpCodeSimdReg.Create); + SetA64("01011110xx100000001110xxxxxxxxxx", InstName.Suqadd_S, InstEmit.Suqadd_S, OpCodeSimd.Create); + SetA64("0>001110<<100000001110xxxxxxxxxx", InstName.Suqadd_V, InstEmit.Suqadd_V, OpCodeSimd.Create); + SetA64("0x001110000xxxxx0xx000xxxxxxxxxx", InstName.Tbl_V, InstEmit.Tbl_V, OpCodeSimdTbl.Create); + SetA64("0x001110000xxxxx0xx100xxxxxxxxxx", InstName.Tbx_V, InstEmit.Tbx_V, OpCodeSimdTbl.Create); + SetA64("0>001110<<0xxxxx001010xxxxxxxxxx", InstName.Trn1_V, InstEmit.Trn1_V, OpCodeSimdReg.Create); + SetA64("0>001110<<0xxxxx011010xxxxxxxxxx", InstName.Trn2_V, InstEmit.Trn2_V, OpCodeSimdReg.Create); + SetA64("0x101110<<1xxxxx011111xxxxxxxxxx", InstName.Uaba_V, InstEmit.Uaba_V, OpCodeSimdReg.Create); + SetA64("0x101110<<1xxxxx010100xxxxxxxxxx", InstName.Uabal_V, InstEmit.Uabal_V, OpCodeSimdReg.Create); + SetA64("0x101110<<1xxxxx011101xxxxxxxxxx", InstName.Uabd_V, InstEmit.Uabd_V, OpCodeSimdReg.Create); + SetA64("0x101110<<1xxxxx011100xxxxxxxxxx", InstName.Uabdl_V, InstEmit.Uabdl_V, OpCodeSimdReg.Create); + SetA64("0x101110<<100000011010xxxxxxxxxx", InstName.Uadalp_V, InstEmit.Uadalp_V, OpCodeSimd.Create); + SetA64("0x101110<<1xxxxx000000xxxxxxxxxx", InstName.Uaddl_V, InstEmit.Uaddl_V, OpCodeSimdReg.Create); + SetA64("0x101110<<100000001010xxxxxxxxxx", InstName.Uaddlp_V, InstEmit.Uaddlp_V, OpCodeSimd.Create); + SetA64("001011100x110000001110xxxxxxxxxx", InstName.Uaddlv_V, InstEmit.Uaddlv_V, OpCodeSimd.Create); + SetA64("01101110<<110000001110xxxxxxxxxx", InstName.Uaddlv_V, InstEmit.Uaddlv_V, OpCodeSimd.Create); + SetA64("0x101110<<1xxxxx000100xxxxxxxxxx", InstName.Uaddw_V, InstEmit.Uaddw_V, OpCodeSimdReg.Create); + SetA64("x00111100x100011000000xxxxxxxxxx", InstName.Ucvtf_Gp, InstEmit.Ucvtf_Gp, OpCodeSimdCvt.Create); + SetA64(">00111100x000011>xxxxxxxxxxxxxxx", InstName.Ucvtf_Gp_Fixed, InstEmit.Ucvtf_Gp_Fixed, OpCodeSimdCvt.Create); + SetA64("011111100x100001110110xxxxxxxxxx", InstName.Ucvtf_S, InstEmit.Ucvtf_S, OpCodeSimd.Create); + SetA64("011111110>>xxxxx111001xxxxxxxxxx", InstName.Ucvtf_S_Fixed, InstEmit.Ucvtf_S_Fixed, OpCodeSimdShImm.Create); + SetA64("0>1011100<100001110110xxxxxxxxxx", InstName.Ucvtf_V, InstEmit.Ucvtf_V, OpCodeSimd.Create); + SetA64("0x101111001xxxxx111001xxxxxxxxxx", InstName.Ucvtf_V_Fixed, InstEmit.Ucvtf_V_Fixed, OpCodeSimdShImm.Create); + SetA64("0110111101xxxxxx111001xxxxxxxxxx", InstName.Ucvtf_V_Fixed, InstEmit.Ucvtf_V_Fixed, OpCodeSimdShImm.Create); + SetA64("0x101110<<1xxxxx000001xxxxxxxxxx", InstName.Uhadd_V, InstEmit.Uhadd_V, OpCodeSimdReg.Create); + SetA64("0x101110<<1xxxxx001001xxxxxxxxxx", InstName.Uhsub_V, InstEmit.Uhsub_V, OpCodeSimdReg.Create); + SetA64("0x101110<<1xxxxx011001xxxxxxxxxx", InstName.Umax_V, InstEmit.Umax_V, OpCodeSimdReg.Create); + SetA64("0x101110<<1xxxxx101001xxxxxxxxxx", InstName.Umaxp_V, InstEmit.Umaxp_V, OpCodeSimdReg.Create); + SetA64("001011100x110000101010xxxxxxxxxx", InstName.Umaxv_V, InstEmit.Umaxv_V, OpCodeSimd.Create); + SetA64("01101110<<110000101010xxxxxxxxxx", InstName.Umaxv_V, InstEmit.Umaxv_V, OpCodeSimd.Create); + SetA64("0x101110<<1xxxxx011011xxxxxxxxxx", InstName.Umin_V, InstEmit.Umin_V, OpCodeSimdReg.Create); + SetA64("0x101110<<1xxxxx101011xxxxxxxxxx", InstName.Uminp_V, InstEmit.Uminp_V, OpCodeSimdReg.Create); + SetA64("001011100x110001101010xxxxxxxxxx", InstName.Uminv_V, InstEmit.Uminv_V, OpCodeSimd.Create); + SetA64("01101110<<110001101010xxxxxxxxxx", InstName.Uminv_V, InstEmit.Uminv_V, OpCodeSimd.Create); + SetA64("0x101110<<1xxxxx100000xxxxxxxxxx", InstName.Umlal_V, InstEmit.Umlal_V, OpCodeSimdReg.Create); + SetA64("0x101111xxxxxxxx0010x0xxxxxxxxxx", InstName.Umlal_Ve, InstEmit.Umlal_Ve, OpCodeSimdRegElem.Create); + SetA64("0x101110<<1xxxxx101000xxxxxxxxxx", InstName.Umlsl_V, InstEmit.Umlsl_V, OpCodeSimdReg.Create); + SetA64("0x101111xxxxxxxx0110x0xxxxxxxxxx", InstName.Umlsl_Ve, InstEmit.Umlsl_Ve, OpCodeSimdRegElem.Create); + SetA64("0x001110000xxxxx001111xxxxxxxxxx", InstName.Umov_S, InstEmit.Umov_S, OpCodeSimdIns.Create); + SetA64("0x101110<<1xxxxx110000xxxxxxxxxx", InstName.Umull_V, InstEmit.Umull_V, OpCodeSimdReg.Create); + SetA64("0x101111xxxxxxxx1010x0xxxxxxxxxx", InstName.Umull_Ve, InstEmit.Umull_Ve, OpCodeSimdRegElem.Create); + SetA64("01111110xx1xxxxx000011xxxxxxxxxx", InstName.Uqadd_S, InstEmit.Uqadd_S, OpCodeSimdReg.Create); + SetA64("0>101110<<1xxxxx000011xxxxxxxxxx", InstName.Uqadd_V, InstEmit.Uqadd_V, OpCodeSimdReg.Create); + SetA64("0>101110<<1xxxxx010111xxxxxxxxxx", InstName.Uqrshl_V, InstEmit.Uqrshl_V, OpCodeSimdReg.Create); + SetA64("0111111100>>>xxx100111xxxxxxxxxx", InstName.Uqrshrn_S, InstEmit.Uqrshrn_S, OpCodeSimdShImm.Create); + SetA64("0x10111100>>>xxx100111xxxxxxxxxx", InstName.Uqrshrn_V, InstEmit.Uqrshrn_V, OpCodeSimdShImm.Create); + SetA64("0>101110<<1xxxxx010011xxxxxxxxxx", InstName.Uqshl_V, InstEmit.Uqshl_V, OpCodeSimdReg.Create); + SetA64("0111111100>>>xxx100101xxxxxxxxxx", InstName.Uqshrn_S, InstEmit.Uqshrn_S, OpCodeSimdShImm.Create); + SetA64("0x10111100>>>xxx100101xxxxxxxxxx", InstName.Uqshrn_V, InstEmit.Uqshrn_V, OpCodeSimdShImm.Create); + SetA64("01111110xx1xxxxx001011xxxxxxxxxx", InstName.Uqsub_S, InstEmit.Uqsub_S, OpCodeSimdReg.Create); + SetA64("0>101110<<1xxxxx001011xxxxxxxxxx", InstName.Uqsub_V, InstEmit.Uqsub_V, OpCodeSimdReg.Create); + SetA64("01111110<<100001010010xxxxxxxxxx", InstName.Uqxtn_S, InstEmit.Uqxtn_S, OpCodeSimd.Create); + SetA64("0x101110<<100001010010xxxxxxxxxx", InstName.Uqxtn_V, InstEmit.Uqxtn_V, OpCodeSimd.Create); + SetA64("0x101110<<1xxxxx000101xxxxxxxxxx", InstName.Urhadd_V, InstEmit.Urhadd_V, OpCodeSimdReg.Create); + SetA64("0>101110<<1xxxxx010101xxxxxxxxxx", InstName.Urshl_V, InstEmit.Urshl_V, OpCodeSimdReg.Create); + SetA64("0111111101xxxxxx001001xxxxxxxxxx", InstName.Urshr_S, InstEmit.Urshr_S, OpCodeSimdShImm.Create); + SetA64("0x10111100>>>xxx001001xxxxxxxxxx", InstName.Urshr_V, InstEmit.Urshr_V, OpCodeSimdShImm.Create); + SetA64("0110111101xxxxxx001001xxxxxxxxxx", InstName.Urshr_V, InstEmit.Urshr_V, OpCodeSimdShImm.Create); + SetA64("0111111101xxxxxx001101xxxxxxxxxx", InstName.Ursra_S, InstEmit.Ursra_S, OpCodeSimdShImm.Create); + SetA64("0x10111100>>>xxx001101xxxxxxxxxx", InstName.Ursra_V, InstEmit.Ursra_V, OpCodeSimdShImm.Create); + SetA64("0110111101xxxxxx001101xxxxxxxxxx", InstName.Ursra_V, InstEmit.Ursra_V, OpCodeSimdShImm.Create); + SetA64("01111110111xxxxx010001xxxxxxxxxx", InstName.Ushl_S, InstEmit.Ushl_S, OpCodeSimdReg.Create); + SetA64("0>101110<<1xxxxx010001xxxxxxxxxx", InstName.Ushl_V, InstEmit.Ushl_V, OpCodeSimdReg.Create); + SetA64("0x10111100>>>xxx101001xxxxxxxxxx", InstName.Ushll_V, InstEmit.Ushll_V, OpCodeSimdShImm.Create); + SetA64("0111111101xxxxxx000001xxxxxxxxxx", InstName.Ushr_S, InstEmit.Ushr_S, OpCodeSimdShImm.Create); + SetA64("0x10111100>>>xxx000001xxxxxxxxxx", InstName.Ushr_V, InstEmit.Ushr_V, OpCodeSimdShImm.Create); + SetA64("0110111101xxxxxx000001xxxxxxxxxx", InstName.Ushr_V, InstEmit.Ushr_V, OpCodeSimdShImm.Create); + SetA64("01111110xx100000001110xxxxxxxxxx", InstName.Usqadd_S, InstEmit.Usqadd_S, OpCodeSimd.Create); + SetA64("0>101110<<100000001110xxxxxxxxxx", InstName.Usqadd_V, InstEmit.Usqadd_V, OpCodeSimd.Create); + SetA64("0111111101xxxxxx000101xxxxxxxxxx", InstName.Usra_S, InstEmit.Usra_S, OpCodeSimdShImm.Create); + SetA64("0x10111100>>>xxx000101xxxxxxxxxx", InstName.Usra_V, InstEmit.Usra_V, OpCodeSimdShImm.Create); + SetA64("0110111101xxxxxx000101xxxxxxxxxx", InstName.Usra_V, InstEmit.Usra_V, OpCodeSimdShImm.Create); + SetA64("0x101110<<1xxxxx001000xxxxxxxxxx", InstName.Usubl_V, InstEmit.Usubl_V, OpCodeSimdReg.Create); + SetA64("0x101110<<1xxxxx001100xxxxxxxxxx", InstName.Usubw_V, InstEmit.Usubw_V, OpCodeSimdReg.Create); + SetA64("0>001110<<0xxxxx000110xxxxxxxxxx", InstName.Uzp1_V, InstEmit.Uzp1_V, OpCodeSimdReg.Create); + SetA64("0>001110<<0xxxxx010110xxxxxxxxxx", InstName.Uzp2_V, InstEmit.Uzp2_V, OpCodeSimdReg.Create); + SetA64("0x001110<<100001001010xxxxxxxxxx", InstName.Xtn_V, InstEmit.Xtn_V, OpCodeSimd.Create); + SetA64("0>001110<<0xxxxx001110xxxxxxxxxx", InstName.Zip1_V, InstEmit.Zip1_V, OpCodeSimdReg.Create); + SetA64("0>001110<<0xxxxx011110xxxxxxxxxx", InstName.Zip2_V, InstEmit.Zip2_V, OpCodeSimdReg.Create); +#endregion + +#region "OpCode Table (AArch32, A32)" + // Base + SetA32("<<<<0010101xxxxxxxxxxxxxxxxxxxxx", InstName.Adc, InstEmit32.Adc, OpCode32AluImm.Create); + SetA32("<<<<0000101xxxxxxxxxxxxxxxx0xxxx", InstName.Adc, InstEmit32.Adc, OpCode32AluRsImm.Create); + SetA32("<<<<0000101xxxxxxxxxxxxx0xx1xxxx", InstName.Adc, InstEmit32.Adc, OpCode32AluRsReg.Create); + SetA32("<<<<0010100xxxxxxxxxxxxxxxxxxxxx", InstName.Add, InstEmit32.Add, OpCode32AluImm.Create); + SetA32("<<<<0000100xxxxxxxxxxxxxxxx0xxxx", InstName.Add, InstEmit32.Add, OpCode32AluRsImm.Create); + SetA32("<<<<0000100xxxxxxxxxxxxx0xx1xxxx", InstName.Add, InstEmit32.Add, OpCode32AluRsReg.Create); + SetA32("<<<<0010000xxxxxxxxxxxxxxxxxxxxx", InstName.And, InstEmit32.And, OpCode32AluImm.Create); + SetA32("<<<<0000000xxxxxxxxxxxxxxxx0xxxx", InstName.And, InstEmit32.And, OpCode32AluRsImm.Create); + SetA32("<<<<0000000xxxxxxxxxxxxx0xx1xxxx", InstName.And, InstEmit32.And, OpCode32AluRsReg.Create); + SetA32("<<<<1010xxxxxxxxxxxxxxxxxxxxxxxx", InstName.B, InstEmit32.B, OpCode32BImm.Create); + SetA32("<<<<0111110xxxxxxxxxxxxxx0011111", InstName.Bfc, InstEmit32.Bfc, OpCode32AluBf.Create); + SetA32("<<<<0111110xxxxxxxxxxxxxx001xxxx", InstName.Bfi, InstEmit32.Bfi, OpCode32AluBf.Create); + SetA32("<<<<0011110xxxxxxxxxxxxxxxxxxxxx", InstName.Bic, InstEmit32.Bic, OpCode32AluImm.Create); + SetA32("<<<<0001110xxxxxxxxxxxxxxxx0xxxx", InstName.Bic, InstEmit32.Bic, OpCode32AluRsImm.Create); + SetA32("<<<<0001110xxxxxxxxxxxxx0xx1xxxx", InstName.Bic, InstEmit32.Bic, OpCode32AluRsReg.Create); + SetA32("<<<<1011xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bl, InstEmit32.Bl, OpCode32BImm.Create); + SetA32("1111101xxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Blx, InstEmit32.Blx, OpCode32BImm.Create); + SetA32("<<<<000100101111111111110011xxxx", InstName.Blx, InstEmit32.Blxr, OpCode32BReg.Create); + SetA32("<<<<000100101111111111110001xxxx", InstName.Bx, InstEmit32.Bx, OpCode32BReg.Create); + SetA32("11110101011111111111000000011111", InstName.Clrex, InstEmit32.Clrex, OpCode32.Create); + SetA32("<<<<000101101111xxxx11110001xxxx", InstName.Clz, InstEmit32.Clz, OpCode32AluReg.Create); + SetA32("<<<<00110111xxxx0000xxxxxxxxxxxx", InstName.Cmn, InstEmit32.Cmn, OpCode32AluImm.Create); + SetA32("<<<<00010111xxxx0000xxxxxxx0xxxx", InstName.Cmn, InstEmit32.Cmn, OpCode32AluRsImm.Create); + SetA32("<<<<00010111xxxx0000xxxx0xx1xxxx", InstName.Cmn, InstEmit32.Cmn, OpCode32AluRsReg.Create); + SetA32("<<<<00110101xxxx0000xxxxxxxxxxxx", InstName.Cmp, InstEmit32.Cmp, OpCode32AluImm.Create); + SetA32("<<<<00010101xxxx0000xxxxxxx0xxxx", InstName.Cmp, InstEmit32.Cmp, OpCode32AluRsImm.Create); + SetA32("<<<<00010101xxxx0000xxxx0xx1xxxx", InstName.Cmp, InstEmit32.Cmp, OpCode32AluRsReg.Create); + SetA32("<<<<00010000xxxxxxxx00000100xxxx", InstName.Crc32b, InstEmit32.Crc32b, OpCode32AluReg.Create); + SetA32("<<<<00010000xxxxxxxx00100100xxxx", InstName.Crc32cb, InstEmit32.Crc32cb, OpCode32AluReg.Create); + SetA32("<<<<00010010xxxxxxxx00100100xxxx", InstName.Crc32ch, InstEmit32.Crc32ch, OpCode32AluReg.Create); + SetA32("<<<<00010100xxxxxxxx00100100xxxx", InstName.Crc32cw, InstEmit32.Crc32cw, OpCode32AluReg.Create); + SetA32("<<<<00010010xxxxxxxx00000100xxxx", InstName.Crc32h, InstEmit32.Crc32h, OpCode32AluReg.Create); + SetA32("<<<<00010100xxxxxxxx00000100xxxx", InstName.Crc32w, InstEmit32.Crc32w, OpCode32AluReg.Create); + SetA32("<<<<0011001000001111000000010100", InstName.Csdb, InstEmit32.Csdb, OpCode32.Create); + SetA32("1111010101111111111100000101xxxx", InstName.Dmb, InstEmit32.Dmb, OpCode32.Create); + SetA32("1111010101111111111100000100xxxx", InstName.Dsb, InstEmit32.Dsb, OpCode32.Create); + SetA32("<<<<0010001xxxxxxxxxxxxxxxxxxxxx", InstName.Eor, InstEmit32.Eor, OpCode32AluImm.Create); + SetA32("<<<<0000001xxxxxxxxxxxxxxxx0xxxx", InstName.Eor, InstEmit32.Eor, OpCode32AluRsImm.Create); + SetA32("<<<<0000001xxxxxxxxxxxxx0xx1xxxx", InstName.Eor, InstEmit32.Eor, OpCode32AluRsReg.Create); + SetA32("<<<<0011001000001111000000010000", InstName.Esb, InstEmit32.Nop, OpCode32.Create); // Error Synchronization Barrier (FEAT_RAS) + SetA32("<<<<001100100000111100000000011x", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint + SetA32("<<<<0011001000001111000000001xxx", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint + SetA32("<<<<0011001000001111000000010001", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint + SetA32("<<<<0011001000001111000000010011", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint + SetA32("<<<<0011001000001111000000010101", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint + SetA32("<<<<001100100000111100000001011x", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint + SetA32("<<<<0011001000001111000000011xxx", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint + SetA32("<<<<00110010000011110000001xxxxx", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint + SetA32("<<<<0011001000001111000001xxxxxx", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint + SetA32("<<<<001100100000111100001xxxxxxx", InstName.Hint, InstEmit32.Nop, OpCode32.Create); // Reserved Hint + SetA32("1111010101111111111100000110xxxx", InstName.Isb, InstEmit32.Nop, OpCode32.Create); + SetA32("<<<<00011001xxxxxxxx110010011111", InstName.Lda, InstEmit32.Lda, OpCode32MemLdEx.Create); + SetA32("<<<<00011101xxxxxxxx110010011111", InstName.Ldab, InstEmit32.Ldab, OpCode32MemLdEx.Create); + SetA32("<<<<00011001xxxxxxxx111010011111", InstName.Ldaex, InstEmit32.Ldaex, OpCode32MemLdEx.Create); + SetA32("<<<<00011101xxxxxxxx111010011111", InstName.Ldaexb, InstEmit32.Ldaexb, OpCode32MemLdEx.Create); + SetA32("<<<<00011011xxxxxxxx111010011111", InstName.Ldaexd, InstEmit32.Ldaexd, OpCode32MemLdEx.Create); + SetA32("<<<<00011111xxxxxxxx111010011111", InstName.Ldaexh, InstEmit32.Ldaexh, OpCode32MemLdEx.Create); + SetA32("<<<<00011111xxxxxxxx110010011111", InstName.Ldah, InstEmit32.Ldah, OpCode32MemLdEx.Create); + SetA32("<<<<100xx0x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldm, InstEmit32.Ldm, OpCode32MemMult.Create); + SetA32("<<<<010xx0x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit32.Ldr, OpCode32MemImm.Create); + SetA32("<<<<011xx0x1xxxxxxxxxxxxxxx0xxxx", InstName.Ldr, InstEmit32.Ldr, OpCode32MemRsImm.Create); + SetA32("<<<<010xx1x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldrb, InstEmit32.Ldrb, OpCode32MemImm.Create); + SetA32("<<<<011xx1x1xxxxxxxxxxxxxxx0xxxx", InstName.Ldrb, InstEmit32.Ldrb, OpCode32MemRsImm.Create); + SetA32("<<<<000xx1x0xxxxxxxxxxxx1101xxxx", InstName.Ldrd, InstEmit32.Ldrd, OpCode32MemImm8.Create); + SetA32("<<<<000xx0x0xxxxxxxx00001101xxxx", InstName.Ldrd, InstEmit32.Ldrd, OpCode32MemReg.Create); + SetA32("<<<<00011001xxxxxxxx111110011111", InstName.Ldrex, InstEmit32.Ldrex, OpCode32MemLdEx.Create); + SetA32("<<<<00011101xxxxxxxx111110011111", InstName.Ldrexb, InstEmit32.Ldrexb, OpCode32MemLdEx.Create); + SetA32("<<<<00011011xxxxxxxx111110011111", InstName.Ldrexd, InstEmit32.Ldrexd, OpCode32MemLdEx.Create); + SetA32("<<<<00011111xxxxxxxx111110011111", InstName.Ldrexh, InstEmit32.Ldrexh, OpCode32MemLdEx.Create); + SetA32("<<<<000xx1x1xxxxxxxxxxxx1011xxxx", InstName.Ldrh, InstEmit32.Ldrh, OpCode32MemImm8.Create); + SetA32("<<<<000xx0x1xxxxxxxx00001011xxxx", InstName.Ldrh, InstEmit32.Ldrh, OpCode32MemReg.Create); + SetA32("<<<<000xx1x1xxxxxxxxxxxx1101xxxx", InstName.Ldrsb, InstEmit32.Ldrsb, OpCode32MemImm8.Create); + SetA32("<<<<000xx0x1xxxxxxxx00001101xxxx", InstName.Ldrsb, InstEmit32.Ldrsb, OpCode32MemReg.Create); + SetA32("<<<<000xx1x1xxxxxxxxxxxx1111xxxx", InstName.Ldrsh, InstEmit32.Ldrsh, OpCode32MemImm8.Create); + SetA32("<<<<000xx0x1xxxxxxxx00001111xxxx", InstName.Ldrsh, InstEmit32.Ldrsh, OpCode32MemReg.Create); + SetA32("<<<<1110xxx0xxxxxxxx111xxxx1xxxx", InstName.Mcr, InstEmit32.Mcr, OpCode32System.Create); + SetA32("<<<<0000001xxxxxxxxxxxxx1001xxxx", InstName.Mla, InstEmit32.Mla, OpCode32AluMla.Create); + SetA32("<<<<00000110xxxxxxxxxxxx1001xxxx", InstName.Mls, InstEmit32.Mls, OpCode32AluMla.Create); + SetA32("<<<<0011101x0000xxxxxxxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, OpCode32AluImm.Create); + SetA32("<<<<0001101x0000xxxxxxxxxxx0xxxx", InstName.Mov, InstEmit32.Mov, OpCode32AluRsImm.Create); + SetA32("<<<<0001101x0000xxxxxxxx0xx1xxxx", InstName.Mov, InstEmit32.Mov, OpCode32AluRsReg.Create); + SetA32("<<<<00110000xxxxxxxxxxxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, OpCode32AluImm16.Create); + SetA32("<<<<00110100xxxxxxxxxxxxxxxxxxxx", InstName.Movt, InstEmit32.Movt, OpCode32AluImm16.Create); + SetA32("<<<<1110xxx1xxxxxxxx111xxxx1xxxx", InstName.Mrc, InstEmit32.Mrc, OpCode32System.Create); + SetA32("<<<<11000101xxxxxxxx111xxxxxxxxx", InstName.Mrrc, InstEmit32.Mrrc, OpCode32System.Create); + SetA32("<<<<00010x001111xxxx000000000000", InstName.Mrs, InstEmit32.Mrs, OpCode32Mrs.Create); + SetA32("<<<<00010x10xxxx111100000000xxxx", InstName.Msr, InstEmit32.Msr, OpCode32MsrReg.Create); + SetA32("<<<<0000000xxxxx0000xxxx1001xxxx", InstName.Mul, InstEmit32.Mul, OpCode32AluMla.Create); + SetA32("<<<<0011111x0000xxxxxxxxxxxxxxxx", InstName.Mvn, InstEmit32.Mvn, OpCode32AluImm.Create); + SetA32("<<<<0001111x0000xxxxxxxxxxx0xxxx", InstName.Mvn, InstEmit32.Mvn, OpCode32AluRsImm.Create); + SetA32("<<<<0001111x0000xxxxxxxx0xx1xxxx", InstName.Mvn, InstEmit32.Mvn, OpCode32AluRsReg.Create); + SetA32("<<<<0011001000001111000000000000", InstName.Nop, InstEmit32.Nop, OpCode32.Create); + SetA32("<<<<0011100xxxxxxxxxxxxxxxxxxxxx", InstName.Orr, InstEmit32.Orr, OpCode32AluImm.Create); + SetA32("<<<<0001100xxxxxxxxxxxxxxxx0xxxx", InstName.Orr, InstEmit32.Orr, OpCode32AluRsImm.Create); + SetA32("<<<<0001100xxxxxxxxxxxxx0xx1xxxx", InstName.Orr, InstEmit32.Orr, OpCode32AluRsReg.Create); + SetA32("<<<<01101000xxxxxxxxxxxxxx01xxxx", InstName.Pkh, InstEmit32.Pkh, OpCode32AluRsImm.Create); + SetA32("11110101xx01xxxx1111xxxxxxxxxxxx", InstName.Pld, InstEmit32.Nop, OpCode32.Create); + SetA32("11110111xx01xxxx1111xxxxxxx0xxxx", InstName.Pld, InstEmit32.Nop, OpCode32.Create); + SetA32("<<<<011011111111xxxx11110011xxxx", InstName.Rbit, InstEmit32.Rbit, OpCode32AluReg.Create); + SetA32("<<<<011010111111xxxx11110011xxxx", InstName.Rev, InstEmit32.Rev, OpCode32AluReg.Create); + SetA32("<<<<011010111111xxxx11111011xxxx", InstName.Rev16, InstEmit32.Rev16, OpCode32AluReg.Create); + SetA32("<<<<011011111111xxxx11111011xxxx", InstName.Revsh, InstEmit32.Revsh, OpCode32AluReg.Create); + SetA32("<<<<0010011xxxxxxxxxxxxxxxxxxxxx", InstName.Rsb, InstEmit32.Rsb, OpCode32AluImm.Create); + SetA32("<<<<0000011xxxxxxxxxxxxxxxx0xxxx", InstName.Rsb, InstEmit32.Rsb, OpCode32AluRsImm.Create); + SetA32("<<<<0000011xxxxxxxxxxxxx0xx1xxxx", InstName.Rsb, InstEmit32.Rsb, OpCode32AluRsReg.Create); + SetA32("<<<<0010111xxxxxxxxxxxxxxxxxxxxx", InstName.Rsc, InstEmit32.Rsc, OpCode32AluImm.Create); + SetA32("<<<<0000111xxxxxxxxxxxxxxxx0xxxx", InstName.Rsc, InstEmit32.Rsc, OpCode32AluRsImm.Create); + SetA32("<<<<0000111xxxxxxxxxxxxx0xx1xxxx", InstName.Rsc, InstEmit32.Rsc, OpCode32AluRsReg.Create); + SetA32("<<<<01100001xxxxxxxx11111001xxxx", InstName.Sadd8, InstEmit32.Sadd8, OpCode32AluReg.Create); + SetA32("<<<<0010110xxxxxxxxxxxxxxxxxxxxx", InstName.Sbc, InstEmit32.Sbc, OpCode32AluImm.Create); + SetA32("<<<<0000110xxxxxxxxxxxxxxxx0xxxx", InstName.Sbc, InstEmit32.Sbc, OpCode32AluRsImm.Create); + SetA32("<<<<0000110xxxxxxxxxxxxx0xx1xxxx", InstName.Sbc, InstEmit32.Sbc, OpCode32AluRsReg.Create); + SetA32("<<<<0111101xxxxxxxxxxxxxx101xxxx", InstName.Sbfx, InstEmit32.Sbfx, OpCode32AluBf.Create); + SetA32("<<<<01110001xxxx1111xxxx0001xxxx", InstName.Sdiv, InstEmit32.Sdiv, OpCode32AluMla.Create); + SetA32("<<<<01101000xxxxxxxx11111011xxxx", InstName.Sel, InstEmit32.Sel, OpCode32AluReg.Create); + SetA32("<<<<0011001000001111000000000100", InstName.Sev, InstEmit32.Nop, OpCode32.Create); + SetA32("<<<<0011001000001111000000000101", InstName.Sevl, InstEmit32.Nop, OpCode32.Create); + SetA32("<<<<01100011xxxxxxxx11111001xxxx", InstName.Shadd8, InstEmit32.Shadd8, OpCode32AluReg.Create); + SetA32("<<<<01100011xxxxxxxx11111111xxxx", InstName.Shsub8, InstEmit32.Shsub8, OpCode32AluReg.Create); + SetA32("<<<<00010000xxxxxxxxxxxx1xx0xxxx", InstName.Smla__, InstEmit32.Smla__, OpCode32AluMla.Create); + SetA32("<<<<0000111xxxxxxxxxxxxx1001xxxx", InstName.Smlal, InstEmit32.Smlal, OpCode32AluUmull.Create); + SetA32("<<<<00010100xxxxxxxxxxxx1xx0xxxx", InstName.Smlal__, InstEmit32.Smlal__, OpCode32AluUmull.Create); + SetA32("<<<<00010010xxxxxxxxxxxx1x00xxxx", InstName.Smlaw_, InstEmit32.Smlaw_, OpCode32AluMla.Create); + SetA32("<<<<01110101xxxxxxxxxxxx00x1xxxx", InstName.Smmla, InstEmit32.Smmla, OpCode32AluMla.Create); + SetA32("<<<<01110101xxxxxxxxxxxx11x1xxxx", InstName.Smmls, InstEmit32.Smmls, OpCode32AluMla.Create); + SetA32("<<<<00010110xxxxxxxxxxxx1xx0xxxx", InstName.Smul__, InstEmit32.Smul__, OpCode32AluMla.Create); + SetA32("<<<<0000110xxxxxxxxxxxxx1001xxxx", InstName.Smull, InstEmit32.Smull, OpCode32AluUmull.Create); + SetA32("<<<<00010010xxxx0000xxxx1x10xxxx", InstName.Smulw_, InstEmit32.Smulw_, OpCode32AluMla.Create); + SetA32("<<<<0110101xxxxxxxxxxxxxxx01xxxx", InstName.Ssat, InstEmit32.Ssat, OpCode32Sat.Create); + SetA32("<<<<01101010xxxxxxxx11110011xxxx", InstName.Ssat16, InstEmit32.Ssat16, OpCode32Sat16.Create); + SetA32("<<<<01100001xxxxxxxx11111111xxxx", InstName.Ssub8, InstEmit32.Ssub8, OpCode32AluReg.Create); + SetA32("<<<<00011000xxxx111111001001xxxx", InstName.Stl, InstEmit32.Stl, OpCode32MemStEx.Create); + SetA32("<<<<00011100xxxx111111001001xxxx", InstName.Stlb, InstEmit32.Stlb, OpCode32MemStEx.Create); + SetA32("<<<<00011000xxxxxxxx11101001xxxx", InstName.Stlex, InstEmit32.Stlex, OpCode32MemStEx.Create); + SetA32("<<<<00011100xxxxxxxx11101001xxxx", InstName.Stlexb, InstEmit32.Stlexb, OpCode32MemStEx.Create); + SetA32("<<<<00011010xxxxxxxx11101001xxxx", InstName.Stlexd, InstEmit32.Stlexd, OpCode32MemStEx.Create); + SetA32("<<<<00011110xxxxxxxx11101001xxxx", InstName.Stlexh, InstEmit32.Stlexh, OpCode32MemStEx.Create); + SetA32("<<<<00011110xxxx111111001001xxxx", InstName.Stlh, InstEmit32.Stlh, OpCode32MemStEx.Create); + SetA32("<<<<100xx0x0xxxxxxxxxxxxxxxxxxxx", InstName.Stm, InstEmit32.Stm, OpCode32MemMult.Create); + SetA32("<<<<010xx0x0xxxxxxxxxxxxxxxxxxxx", InstName.Str, InstEmit32.Str, OpCode32MemImm.Create); + SetA32("<<<<011xx0x0xxxxxxxxxxxxxxx0xxxx", InstName.Str, InstEmit32.Str, OpCode32MemRsImm.Create); + SetA32("<<<<010xx1x0xxxxxxxxxxxxxxxxxxxx", InstName.Strb, InstEmit32.Strb, OpCode32MemImm.Create); + SetA32("<<<<011xx1x0xxxxxxxxxxxxxxx0xxxx", InstName.Strb, InstEmit32.Strb, OpCode32MemRsImm.Create); + SetA32("<<<<000xx1x0xxxxxxxxxxxx1111xxxx", InstName.Strd, InstEmit32.Strd, OpCode32MemImm8.Create); + SetA32("<<<<000xx0x0xxxxxxxx00001111xxxx", InstName.Strd, InstEmit32.Strd, OpCode32MemReg.Create); + SetA32("<<<<00011000xxxxxxxx11111001xxxx", InstName.Strex, InstEmit32.Strex, OpCode32MemStEx.Create); + SetA32("<<<<00011100xxxxxxxx11111001xxxx", InstName.Strexb, InstEmit32.Strexb, OpCode32MemStEx.Create); + SetA32("<<<<00011010xxxxxxxx11111001xxxx", InstName.Strexd, InstEmit32.Strexd, OpCode32MemStEx.Create); + SetA32("<<<<00011110xxxxxxxx11111001xxxx", InstName.Strexh, InstEmit32.Strexh, OpCode32MemStEx.Create); + SetA32("<<<<000xx1x0xxxxxxxxxxxx1011xxxx", InstName.Strh, InstEmit32.Strh, OpCode32MemImm8.Create); + SetA32("<<<<000xx0x0xxxxxxxx00001011xxxx", InstName.Strh, InstEmit32.Strh, OpCode32MemReg.Create); + SetA32("<<<<0010010xxxxxxxxxxxxxxxxxxxxx", InstName.Sub, InstEmit32.Sub, OpCode32AluImm.Create); + SetA32("<<<<0000010xxxxxxxxxxxxxxxx0xxxx", InstName.Sub, InstEmit32.Sub, OpCode32AluRsImm.Create); + SetA32("<<<<0000010xxxxxxxxxxxxx0xx1xxxx", InstName.Sub, InstEmit32.Sub, OpCode32AluRsReg.Create); + SetA32("<<<<1111xxxxxxxxxxxxxxxxxxxxxxxx", InstName.Svc, InstEmit32.Svc, OpCode32Exception.Create); + SetA32("<<<<01101010xxxxxxxxxx000111xxxx", InstName.Sxtb, InstEmit32.Sxtb, OpCode32AluUx.Create); + SetA32("<<<<01101000xxxxxxxxxx000111xxxx", InstName.Sxtb16, InstEmit32.Sxtb16, OpCode32AluUx.Create); + SetA32("<<<<01101011xxxxxxxxxx000111xxxx", InstName.Sxth, InstEmit32.Sxth, OpCode32AluUx.Create); + SetA32("<<<<00110011xxxx0000xxxxxxxxxxxx", InstName.Teq, InstEmit32.Teq, OpCode32AluImm.Create); + SetA32("<<<<00010011xxxx0000xxxxxxx0xxxx", InstName.Teq, InstEmit32.Teq, OpCode32AluRsImm.Create); + SetA32("<<<<00010011xxxx0000xxxx0xx1xxxx", InstName.Teq, InstEmit32.Teq, OpCode32AluRsReg.Create); + SetA32("<<<<0111111111111101111011111110", InstName.Trap, InstEmit32.Trap, OpCode32Exception.Create); + SetA32("<<<<0011001000001111000000010010", InstName.Tsb, InstEmit32.Nop, OpCode32.Create); // Trace Synchronization Barrier (FEAT_TRF) + SetA32("<<<<00110001xxxx0000xxxxxxxxxxxx", InstName.Tst, InstEmit32.Tst, OpCode32AluImm.Create); + SetA32("<<<<00010001xxxx0000xxxxxxx0xxxx", InstName.Tst, InstEmit32.Tst, OpCode32AluRsImm.Create); + SetA32("<<<<00010001xxxx0000xxxx0xx1xxxx", InstName.Tst, InstEmit32.Tst, OpCode32AluRsReg.Create); + SetA32("<<<<01100101xxxxxxxx11111001xxxx", InstName.Uadd8, InstEmit32.Uadd8, OpCode32AluReg.Create); + SetA32("<<<<0111111xxxxxxxxxxxxxx101xxxx", InstName.Ubfx, InstEmit32.Ubfx, OpCode32AluBf.Create); + SetA32("<<<<01110011xxxx1111xxxx0001xxxx", InstName.Udiv, InstEmit32.Udiv, OpCode32AluMla.Create); + SetA32("<<<<01100111xxxxxxxx11111001xxxx", InstName.Uhadd8, InstEmit32.Uhadd8, OpCode32AluReg.Create); + SetA32("<<<<01100111xxxxxxxx11111111xxxx", InstName.Uhsub8, InstEmit32.Uhsub8, OpCode32AluReg.Create); + SetA32("<<<<00000100xxxxxxxxxxxx1001xxxx", InstName.Umaal, InstEmit32.Umaal, OpCode32AluUmull.Create); + SetA32("<<<<0000101xxxxxxxxxxxxx1001xxxx", InstName.Umlal, InstEmit32.Umlal, OpCode32AluUmull.Create); + SetA32("<<<<0000100xxxxxxxxxxxxx1001xxxx", InstName.Umull, InstEmit32.Umull, OpCode32AluUmull.Create); + SetA32("<<<<0110111xxxxxxxxxxxxxxx01xxxx", InstName.Usat, InstEmit32.Usat, OpCode32Sat.Create); + SetA32("<<<<01101110xxxxxxxx11110011xxxx", InstName.Usat16, InstEmit32.Usat16, OpCode32Sat16.Create); + SetA32("<<<<01100101xxxxxxxx11111111xxxx", InstName.Usub8, InstEmit32.Usub8, OpCode32AluReg.Create); + SetA32("<<<<01101110xxxxxxxxxx000111xxxx", InstName.Uxtb, InstEmit32.Uxtb, OpCode32AluUx.Create); + SetA32("<<<<01101100xxxxxxxxxx000111xxxx", InstName.Uxtb16, InstEmit32.Uxtb16, OpCode32AluUx.Create); + SetA32("<<<<01101111xxxxxxxxxx000111xxxx", InstName.Uxth, InstEmit32.Uxth, OpCode32AluUx.Create); + SetA32("<<<<0011001000001111000000000010", InstName.Wfe, InstEmit32.Nop, OpCode32.Create); + SetA32("<<<<0011001000001111000000000011", InstName.Wfi, InstEmit32.Nop, OpCode32.Create); + SetA32("<<<<0011001000001111000000000001", InstName.Yield, InstEmit32.Nop, OpCode32.Create); + + // VFP + SetVfp("<<<<11101x110000xxxx101x11x0xxxx", InstName.Vabs, InstEmit32.Vabs_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32); + SetVfp("<<<<11100x11xxxxxxxx101xx0x0xxxx", InstName.Vadd, InstEmit32.Vadd_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32); + SetVfp("<<<<11101x11010xxxxx101x01x0xxxx", InstName.Vcmp, InstEmit32.Vcmp, OpCode32SimdS.Create, OpCode32SimdS.CreateT32); + SetVfp("<<<<11101x11010xxxxx101x11x0xxxx", InstName.Vcmpe, InstEmit32.Vcmpe, OpCode32SimdS.Create, OpCode32SimdS.CreateT32); + SetVfp("<<<<11101x110111xxxx101x11x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FD, OpCode32SimdS.Create, OpCode32SimdS.CreateT32); // FP 32 and 64, scalar. + SetVfp("<<<<11101x11110xxxxx101x11x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, OpCode32SimdCvtFI.Create, OpCode32SimdCvtFI.CreateT32); // FP32 to int. + SetVfp("<<<<11101x111000xxxx101xx1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_FI, OpCode32SimdCvtFI.Create, OpCode32SimdCvtFI.CreateT32); // Int to FP32. + SetVfp("111111101x1111xxxxxx101xx1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_RM, OpCode32SimdCvtFI.Create, OpCode32SimdCvtFI.CreateT32); // The many FP32 to int encodings (fp). + SetVfp("<<<<11101x11001xxxxx101xx1x0xxxx", InstName.Vcvt, InstEmit32.Vcvt_TB, OpCode32SimdCvtTB.Create, OpCode32SimdCvtTB.CreateT32); + SetVfp("<<<<11101x00xxxxxxxx101xx0x0xxxx", InstName.Vdiv, InstEmit32.Vdiv_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32); + SetVfp("<<<<11101xx0xxxxxxxx1011x0x10000", InstName.Vdup, InstEmit32.Vdup, OpCode32SimdDupGP.Create, OpCode32SimdDupGP.CreateT32); + SetVfp("<<<<11101x10xxxxxxxx101xx0x0xxxx", InstName.Vfma, InstEmit32.Vfma_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32); + SetVfp("<<<<11101x10xxxxxxxx101xx1x0xxxx", InstName.Vfms, InstEmit32.Vfms_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32); + SetVfp("<<<<11101x01xxxxxxxx101xx1x0xxxx", InstName.Vfnma, InstEmit32.Vfnma_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32); + SetVfp("<<<<11101x01xxxxxxxx101xx0x0xxxx", InstName.Vfnms, InstEmit32.Vfnms_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32); + SetVfp("<<<<11001x01xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32); + SetVfp("<<<<11001x11xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32); + SetVfp("<<<<11010x11xxxxxxxx1011xxxxxxx0", InstName.Vldm, InstEmit32.Vldm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32); + SetVfp("<<<<11001x01xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32); + SetVfp("<<<<11001x11xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32); + SetVfp("<<<<11010x11xxxxxxxx1010xxxxxxxx", InstName.Vldm, InstEmit32.Vldm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32); + SetVfp("<<<<1101xx01xxxxxxxx101xxxxxxxxx", InstName.Vldr, InstEmit32.Vldr, OpCode32SimdMemImm.Create, OpCode32SimdMemImm.CreateT32); + SetVfp("111111101x00xxxxxxxx10>>x0x0xxxx", InstName.Vmaxnm, InstEmit32.Vmaxnm_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32); + SetVfp("111111101x00xxxxxxxx10>>x1x0xxxx", InstName.Vminnm, InstEmit32.Vminnm_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32); + SetVfp("<<<<11100x00xxxxxxxx101xx0x0xxxx", InstName.Vmla, InstEmit32.Vmla_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32); + SetVfp("<<<<11100x00xxxxxxxx101xx1x0xxxx", InstName.Vmls, InstEmit32.Vmls_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32); + SetVfp("<<<<11100xx0xxxxxxxx1011xxx10000", InstName.Vmov, InstEmit32.Vmov_G1, OpCode32SimdMovGpElem.Create, OpCode32SimdMovGpElem.CreateT32); // From gen purpose. + SetVfp("<<<<1110xxx1xxxxxxxx1011xxx10000", InstName.Vmov, InstEmit32.Vmov_G1, OpCode32SimdMovGpElem.Create, OpCode32SimdMovGpElem.CreateT32); // To gen purpose. + SetVfp("<<<<1100010xxxxxxxxx101000x1xxxx", InstName.Vmov, InstEmit32.Vmov_G2, OpCode32SimdMovGpDouble.Create, OpCode32SimdMovGpDouble.CreateT32); // To/from gen purpose x2 and single precision x2. + SetVfp("<<<<1100010xxxxxxxxx101100x1xxxx", InstName.Vmov, InstEmit32.Vmov_GD, OpCode32SimdMovGpDouble.Create, OpCode32SimdMovGpDouble.CreateT32); // To/from gen purpose x2 and double precision. + SetVfp("<<<<1110000xxxxxxxxx1010x0010000", InstName.Vmov, InstEmit32.Vmov_GS, OpCode32SimdMovGp.Create, OpCode32SimdMovGp.CreateT32); // To/from gen purpose and single precision. + SetVfp("<<<<11101x11xxxxxxxx101x0000xxxx", InstName.Vmov, InstEmit32.Vmov_I, OpCode32SimdImm44.Create, OpCode32SimdImm44.CreateT32); // Scalar f16/32/64 based on size 01 10 11. + SetVfp("<<<<11101x110000xxxx101x01x0xxxx", InstName.Vmov, InstEmit32.Vmov_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32); + SetVfp("<<<<11101111xxxxxxxx101000010000", InstName.Vmrs, InstEmit32.Vmrs, OpCode32SimdSpecial.Create, OpCode32SimdSpecial.CreateT32); + SetVfp("<<<<11101110xxxxxxxx101000010000", InstName.Vmsr, InstEmit32.Vmsr, OpCode32SimdSpecial.Create, OpCode32SimdSpecial.CreateT32); + SetVfp("<<<<11100x10xxxxxxxx101xx0x0xxxx", InstName.Vmul, InstEmit32.Vmul_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32); + SetVfp("<<<<11101x110001xxxx101x01x0xxxx", InstName.Vneg, InstEmit32.Vneg_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32); + SetVfp("<<<<11100x01xxxxxxxx101xx1x0xxxx", InstName.Vnmla, InstEmit32.Vnmla_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32); + SetVfp("<<<<11100x01xxxxxxxx101xx0x0xxxx", InstName.Vnmls, InstEmit32.Vnmls_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32); + SetVfp("<<<<11100x10xxxxxxxx101xx1x0xxxx", InstName.Vnmul, InstEmit32.Vnmul_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32); + SetVfp("111111101x1110xxxxxx101x01x0xxxx", InstName.Vrint, InstEmit32.Vrint_RM, OpCode32SimdS.Create, OpCode32SimdS.CreateT32); + SetVfp("<<<<11101x110110xxxx101x11x0xxxx", InstName.Vrint, InstEmit32.Vrint_Z, OpCode32SimdS.Create, OpCode32SimdS.CreateT32); + SetVfp("<<<<11101x110111xxxx101x01x0xxxx", InstName.Vrintx, InstEmit32.Vrintx_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32); + SetVfp("<<<<11101x110001xxxx101x11x0xxxx", InstName.Vsqrt, InstEmit32.Vsqrt_S, OpCode32SimdS.Create, OpCode32SimdS.CreateT32); + SetVfp("111111100xxxxxxxxxxx101xx0x0xxxx", InstName.Vsel, InstEmit32.Vsel, OpCode32SimdSel.Create, OpCode32SimdSel.CreateT32); + SetVfp("<<<<11001x00xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32); + SetVfp("<<<<11001x10xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32); + SetVfp("<<<<11010x10xxxxxxxx1011xxxxxxx0", InstName.Vstm, InstEmit32.Vstm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32); + SetVfp("<<<<11001x00xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32); + SetVfp("<<<<11001x10xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32); + SetVfp("<<<<11010x10xxxxxxxx1010xxxxxxxx", InstName.Vstm, InstEmit32.Vstm, OpCode32SimdMemMult.Create, OpCode32SimdMemMult.CreateT32); + SetVfp("<<<<1101xx00xxxxxxxx101xxxxxxxxx", InstName.Vstr, InstEmit32.Vstr, OpCode32SimdMemImm.Create, OpCode32SimdMemImm.CreateT32); + SetVfp("<<<<11100x11xxxxxxxx101xx1x0xxxx", InstName.Vsub, InstEmit32.Vsub_S, OpCode32SimdRegS.Create, OpCode32SimdRegS.CreateT32); + + // ASIMD + SetAsimd("111100111x110000xxx0001101x0xxx0", InstName.Aesd_V, InstEmit32.Aesd_V, OpCode32Simd.Create, OpCode32Simd.CreateT32); + SetAsimd("111100111x110000xxx0001100x0xxx0", InstName.Aese_V, InstEmit32.Aese_V, OpCode32Simd.Create, OpCode32Simd.CreateT32); + SetAsimd("111100111x110000xxx0001111x0xxx0", InstName.Aesimc_V, InstEmit32.Aesimc_V, OpCode32Simd.Create, OpCode32Simd.CreateT32); + SetAsimd("111100111x110000xxx0001110x0xxx0", InstName.Aesmc_V, InstEmit32.Aesmc_V, OpCode32Simd.Create, OpCode32Simd.CreateT32); + SetAsimd("111100110x00xxx0xxx01100x1x0xxx0", InstName.Sha256h_V, InstEmit32.Sha256h_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100110x01xxx0xxx01100x1x0xxx0", InstName.Sha256h2_V, InstEmit32.Sha256h2_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100111x111010xxx0001111x0xxx0", InstName.Sha256su0_V, InstEmit32.Sha256su0_V, OpCode32Simd.Create, OpCode32Simd.CreateT32); + SetAsimd("111100110x10xxx0xxx01100x1x0xxx0", InstName.Sha256su1_V, InstEmit32.Sha256su1_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("1111001x0x<<xxxxxxxx0111xxx0xxxx", InstName.Vabd, InstEmit32.Vabd_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("1111001x1x<<xxxxxxxx0111x0x0xxxx", InstName.Vabdl, InstEmit32.Vabdl_I, OpCode32SimdRegLong.Create, OpCode32SimdRegLong.CreateT32); + SetAsimd("111100111x11<<01xxxx00110xx0xxxx", InstName.Vabs, InstEmit32.Vabs_V, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32); + SetAsimd("111100111x111001xxxx01110xx0xxxx", InstName.Vabs, InstEmit32.Vabs_V, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32); + SetAsimd("111100100xxxxxxxxxxx1000xxx0xxxx", InstName.Vadd, InstEmit32.Vadd_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100100x00xxxxxxxx1101xxx0xxxx", InstName.Vadd, InstEmit32.Vadd_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("1111001x1x<<xxxxxxx00000x0x0xxxx", InstName.Vaddl, InstEmit32.Vaddl_I, OpCode32SimdRegLong.Create, OpCode32SimdRegLong.CreateT32); + SetAsimd("1111001x1x<<xxxxxxx00001x0x0xxxx", InstName.Vaddw, InstEmit32.Vaddw_I, OpCode32SimdRegWide.Create, OpCode32SimdRegWide.CreateT32); + SetAsimd("111100100x00xxxxxxxx0001xxx1xxxx", InstName.Vand, InstEmit32.Vand_I, OpCode32SimdBinary.Create, OpCode32SimdBinary.CreateT32); + SetAsimd("111100100x01xxxxxxxx0001xxx1xxxx", InstName.Vbic, InstEmit32.Vbic_I, OpCode32SimdBinary.Create, OpCode32SimdBinary.CreateT32); + SetAsimd("1111001x1x000xxxxxxx<<x10x11xxxx", InstName.Vbic, InstEmit32.Vbic_II, OpCode32SimdImm.Create, OpCode32SimdImm.CreateT32); + SetAsimd("111100110x11xxxxxxxx0001xxx1xxxx", InstName.Vbif, InstEmit32.Vbif, OpCode32SimdBinary.Create, OpCode32SimdBinary.CreateT32); + SetAsimd("111100110x10xxxxxxxx0001xxx1xxxx", InstName.Vbit, InstEmit32.Vbit, OpCode32SimdBinary.Create, OpCode32SimdBinary.CreateT32); + SetAsimd("111100110x01xxxxxxxx0001xxx1xxxx", InstName.Vbsl, InstEmit32.Vbsl, OpCode32SimdBinary.Create, OpCode32SimdBinary.CreateT32); + SetAsimd("111100110x<<xxxxxxxx1000xxx1xxxx", InstName.Vceq, InstEmit32.Vceq_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100100x00xxxxxxxx1110xxx0xxxx", InstName.Vceq, InstEmit32.Vceq_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100111x11xx01xxxx0x010xx0xxxx", InstName.Vceq, InstEmit32.Vceq_Z, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32); + SetAsimd("1111001x0x<<xxxxxxxx0011xxx1xxxx", InstName.Vcge, InstEmit32.Vcge_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100110x00xxxxxxxx1110xxx0xxxx", InstName.Vcge, InstEmit32.Vcge_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100111x11xx01xxxx0x001xx0xxxx", InstName.Vcge, InstEmit32.Vcge_Z, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32); + SetAsimd("1111001x0x<<xxxxxxxx0011xxx0xxxx", InstName.Vcgt, InstEmit32.Vcgt_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100110x10xxxxxxxx1110xxx0xxxx", InstName.Vcgt, InstEmit32.Vcgt_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100111x11xx01xxxx0x000xx0xxxx", InstName.Vcgt, InstEmit32.Vcgt_Z, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32); + SetAsimd("111100111x11xx01xxxx0x011xx0xxxx", InstName.Vcle, InstEmit32.Vcle_Z, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32); + SetAsimd("111100111x11xx01xxxx0x100xx0xxxx", InstName.Vclt, InstEmit32.Vclt_Z, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32); + SetAsimd("111100111x110000xxxx01010xx0xxxx", InstName.Vcnt, InstEmit32.Vcnt, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32); + SetAsimd("111100111x111011xxxx011xxxx0xxxx", InstName.Vcvt, InstEmit32.Vcvt_V, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32); // FP and integer, vector. + SetAsimd("111100111x11xxxxxxxx11000xx0xxxx", InstName.Vdup, InstEmit32.Vdup_1, OpCode32SimdDupElem.Create, OpCode32SimdDupElem.CreateT32); + SetAsimd("111100110x00xxxxxxxx0001xxx1xxxx", InstName.Veor, InstEmit32.Veor_I, OpCode32SimdBinary.Create, OpCode32SimdBinary.CreateT32); + SetAsimd("111100101x11xxxxxxxxxxxxxxx0xxxx", InstName.Vext, InstEmit32.Vext, OpCode32SimdExt.Create, OpCode32SimdExt.CreateT32); + SetAsimd("111100100x00xxxxxxxx1100xxx1xxxx", InstName.Vfma, InstEmit32.Vfma_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100100x10xxxxxxxx1100xxx1xxxx", InstName.Vfms, InstEmit32.Vfms_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("1111001x0x<<xxxxxxxx0000xxx0xxxx", InstName.Vhadd, InstEmit32.Vhadd, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111101001x10xxxxxxxx0000xxx0xxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101001x10xxxxxxxx0100xx0xxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101001x10xxxxxxxx1000x000xxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101001x10xxxxxxxx1000x011xxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101001x10xxxxxxxx110000x0xxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101001x10xxxxxxxx110001xxxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101001x10xxxxxxxx110010xxxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101000x10xxxxxxxx0111xx0xxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 1. + SetAsimd("111101000x10xxxxxxxx1010xx<<xxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 2. + SetAsimd("111101000x10xxxxxxxx0110xx0xxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 3. + SetAsimd("111101000x10xxxxxxxx0010xxxxxxxx", InstName.Vld1, InstEmit32.Vld1, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 4. + SetAsimd("111101001x10xxxxxxxx0x01xxxxxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101001x10xxxxxxxx1001xx0xxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101001x10xxxxxxxx1101<<xxxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101000x10xxxxxxxx100x<<0xxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 1, inc = 1/2 (itype). + SetAsimd("111101000x10xxxxxxxx100x<<10xxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 1, inc = 1/2 (itype). + SetAsimd("111101000x10xxxxxxxx0011<<xxxxxx", InstName.Vld2, InstEmit32.Vld2, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 2, inc = 2. + SetAsimd("111101001x10xxxxxxxx0x10xxx0xxxx", InstName.Vld3, InstEmit32.Vld3, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101001x10xxxxxxxx1010xx00xxxx", InstName.Vld3, InstEmit32.Vld3, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101001x10xxxxxxxx1110<<x0xxxx", InstName.Vld3, InstEmit32.Vld3, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101000x10xxxxxxxx010x<<0xxxxx", InstName.Vld3, InstEmit32.Vld3, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Inc = 1/2 (itype). + SetAsimd("111101001x10xxxxxxxx0x11xxxxxxxx", InstName.Vld4, InstEmit32.Vld4, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101001x10xxxxxxxx1011xx<<xxxx", InstName.Vld4, InstEmit32.Vld4, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101001x10xxxxxxxx1111<<x>xxxx", InstName.Vld4, InstEmit32.Vld4, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101000x10xxxxxxxx000x<<xxxxxx", InstName.Vld4, InstEmit32.Vld4, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Inc = 1/2 (itype). + SetAsimd("1111001x0x<<xxxxxxxx0110xxx0xxxx", InstName.Vmax, InstEmit32.Vmax_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100100x00xxxxxxxx1111xxx0xxxx", InstName.Vmax, InstEmit32.Vmax_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("1111001x0x<<xxxxxxxx0110xxx1xxxx", InstName.Vmin, InstEmit32.Vmin_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100100x10xxxxxxxx1111xxx0xxxx", InstName.Vmin, InstEmit32.Vmin_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100110x0xxxxxxxxx1111xxx1xxxx", InstName.Vmaxnm, InstEmit32.Vmaxnm_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100110x1xxxxxxxxx1111xxx1xxxx", InstName.Vminnm, InstEmit32.Vminnm_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("1111001x1x<<xxxxxxxx000xx1x0xxxx", InstName.Vmla, InstEmit32.Vmla_1, OpCode32SimdRegElem.Create, OpCode32SimdRegElem.CreateT32); + SetAsimd("111100100xxxxxxxxxxx1001xxx0xxxx", InstName.Vmla, InstEmit32.Vmla_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100100x00xxxxxxxx1101xxx1xxxx", InstName.Vmla, InstEmit32.Vmla_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("1111001x1x<<xxxxxxx01000x0x0xxxx", InstName.Vmlal, InstEmit32.Vmlal_I, OpCode32SimdRegLong.Create, OpCode32SimdRegLong.CreateT32); + SetAsimd("1111001x1x<<xxxxxxxx010xx1x0xxxx", InstName.Vmls, InstEmit32.Vmls_1, OpCode32SimdRegElem.Create, OpCode32SimdRegElem.CreateT32); + SetAsimd("111100100x10xxxxxxxx1101xxx1xxxx", InstName.Vmls, InstEmit32.Vmls_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100110xxxxxxxxxxx1001xxx0xxxx", InstName.Vmls, InstEmit32.Vmls_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("1111001x1x<<xxxxxxx01010x0x0xxxx", InstName.Vmlsl, InstEmit32.Vmlsl_I, OpCode32SimdRegLong.Create, OpCode32SimdRegLong.CreateT32); + SetAsimd("1111001x1x000xxxxxxx0xx00x01xxxx", InstName.Vmov, InstEmit32.Vmov_I, OpCode32SimdImm.Create, OpCode32SimdImm.CreateT32); // D/Q vector I32. + SetAsimd("1111001x1x000xxxxxxx10x00x01xxxx", InstName.Vmov, InstEmit32.Vmov_I, OpCode32SimdImm.Create, OpCode32SimdImm.CreateT32); // D/Q I16. + SetAsimd("1111001x1x000xxxxxxx11xx0x01xxxx", InstName.Vmov, InstEmit32.Vmov_I, OpCode32SimdImm.Create, OpCode32SimdImm.CreateT32); // D/Q (dt - from cmode). + SetAsimd("1111001x1x000xxxxxxx11100x11xxxx", InstName.Vmov, InstEmit32.Vmov_I, OpCode32SimdImm.Create, OpCode32SimdImm.CreateT32); // D/Q I64. + SetAsimd("1111001x1x001000xxx0101000x1xxxx", InstName.Vmovl, InstEmit32.Vmovl, OpCode32SimdLong.Create, OpCode32SimdLong.CreateT32); + SetAsimd("1111001x1x010000xxx0101000x1xxxx", InstName.Vmovl, InstEmit32.Vmovl, OpCode32SimdLong.Create, OpCode32SimdLong.CreateT32); + SetAsimd("1111001x1x100000xxx0101000x1xxxx", InstName.Vmovl, InstEmit32.Vmovl, OpCode32SimdLong.Create, OpCode32SimdLong.CreateT32); + SetAsimd("111100111x11<<10xxxx001000x0xxx0", InstName.Vmovn, InstEmit32.Vmovn, OpCode32SimdMovn.Create, OpCode32SimdMovn.CreateT32); + SetAsimd("1111001x1x<<xxxxxxxx100xx1x0xxxx", InstName.Vmul, InstEmit32.Vmul_1, OpCode32SimdRegElem.Create, OpCode32SimdRegElem.CreateT32); + SetAsimd("111100100x<<xxxxxxxx1001xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100110x00xxxxxxxx1001xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100110x00xxxxxxxx1101xxx1xxxx", InstName.Vmul, InstEmit32.Vmul_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("1111001x1x<<xxxxxxx01010x1x0xxxx", InstName.Vmull, InstEmit32.Vmull_1, OpCode32SimdRegElemLong.Create, OpCode32SimdRegElemLong.CreateT32); + SetAsimd("1111001x1x<<xxxxxxx01100x0x0xxxx", InstName.Vmull, InstEmit32.Vmull_I, OpCode32SimdRegLong.Create, OpCode32SimdRegLong.CreateT32); + SetAsimd("111100101xx0xxxxxxx01110x0x0xxxx", InstName.Vmull, InstEmit32.Vmull_I, OpCode32SimdRegLong.Create, OpCode32SimdRegLong.CreateT32); // P8/P64 + SetAsimd("111100111x110000xxxx01011xx0xxxx", InstName.Vmvn, InstEmit32.Vmvn_I, OpCode32SimdBinary.Create, OpCode32SimdBinary.CreateT32); + SetAsimd("1111001x1x000xxxxxxx0xx00x11xxxx", InstName.Vmvn, InstEmit32.Vmvn_II, OpCode32SimdImm.Create, OpCode32SimdImm.CreateT32); // D/Q vector I32. + SetAsimd("1111001x1x000xxxxxxx10x00x11xxxx", InstName.Vmvn, InstEmit32.Vmvn_II, OpCode32SimdImm.Create, OpCode32SimdImm.CreateT32); + SetAsimd("1111001x1x000xxxxxxx110x0x11xxxx", InstName.Vmvn, InstEmit32.Vmvn_II, OpCode32SimdImm.Create, OpCode32SimdImm.CreateT32); + SetAsimd("111100111x11<<01xxxx00111xx0xxxx", InstName.Vneg, InstEmit32.Vneg_V, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32); + SetAsimd("111100111x111001xxxx01111xx0xxxx", InstName.Vneg, InstEmit32.Vneg_V, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32); + SetAsimd("111100100x11xxxxxxxx0001xxx1xxxx", InstName.Vorn, InstEmit32.Vorn_I, OpCode32SimdBinary.Create, OpCode32SimdBinary.CreateT32); + SetAsimd("111100100x10xxxxxxxx0001xxx1xxxx", InstName.Vorr, InstEmit32.Vorr_I, OpCode32SimdBinary.Create, OpCode32SimdBinary.CreateT32); + SetAsimd("1111001x1x000xxxxxxx<<x10x01xxxx", InstName.Vorr, InstEmit32.Vorr_II, OpCode32SimdImm.Create, OpCode32SimdImm.CreateT32); + SetAsimd("111100100x<<xxxxxxxx1011x0x1xxxx", InstName.Vpadd, InstEmit32.Vpadd_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100110x00xxxxxxxx1101x0x0xxxx", InstName.Vpadd, InstEmit32.Vpadd_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100111x11<<00xxxx0010xxx0xxxx", InstName.Vpaddl, InstEmit32.Vpaddl, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32); + SetAsimd("1111001x0x<<xxxxxxxx1010x0x0xxxx", InstName.Vpmax, InstEmit32.Vpmax_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100110x00xxxxxxxx1111x0x0xxxx", InstName.Vpmax, InstEmit32.Vpmax_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("1111001x0x<<xxxxxxxx1010x0x1xxxx", InstName.Vpmin, InstEmit32.Vpmin_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100110x10xxxxxxxx1111x0x0xxxx", InstName.Vpmin, InstEmit32.Vpmin_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("1111001x0xxxxxxxxxxx0000xxx1xxxx", InstName.Vqadd, InstEmit32.Vqadd, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100100x01xxxxxxxx1011xxx0xxxx", InstName.Vqdmulh, InstEmit32.Vqdmulh, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100100x10xxxxxxxx1011xxx0xxxx", InstName.Vqdmulh, InstEmit32.Vqdmulh, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100111x11<<10xxxx00101xx0xxx0", InstName.Vqmovn, InstEmit32.Vqmovn, OpCode32SimdMovn.Create, OpCode32SimdMovn.CreateT32); + SetAsimd("111100111x11<<10xxxx001001x0xxx0", InstName.Vqmovun, InstEmit32.Vqmovun, OpCode32SimdMovn.Create, OpCode32SimdMovn.CreateT32); + SetAsimd("1111001x1x>>>xxxxxxx100101x1xxx0", InstName.Vqrshrn, InstEmit32.Vqrshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); + SetAsimd("111100111x>>>xxxxxxx100001x1xxx0", InstName.Vqrshrun, InstEmit32.Vqrshrun, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); + SetAsimd("1111001x1x>>>xxxxxxx100100x1xxx0", InstName.Vqshrn, InstEmit32.Vqshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); + SetAsimd("111100111x>>>xxxxxxx100000x1xxx0", InstName.Vqshrun, InstEmit32.Vqshrun, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); + SetAsimd("1111001x0xxxxxxxxxxx0010xxx1xxxx", InstName.Vqsub, InstEmit32.Vqsub, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100111x111011xxxx010x0xx0xxxx", InstName.Vrecpe, InstEmit32.Vrecpe, OpCode32SimdSqrte.Create, OpCode32SimdSqrte.CreateT32); + SetAsimd("111100100x00xxxxxxxx1111xxx1xxxx", InstName.Vrecps, InstEmit32.Vrecps, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100111x11xx00xxxx000<<xx0xxxx", InstName.Vrev, InstEmit32.Vrev, OpCode32SimdRev.Create, OpCode32SimdRev.CreateT32); + SetAsimd("1111001x0x<<xxxxxxxx0001xxx0xxxx", InstName.Vrhadd, InstEmit32.Vrhadd, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100111x111010xxxx01010xx0xxxx", InstName.Vrinta, InstEmit32.Vrinta_V, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32); + SetAsimd("111100111x111010xxxx01101xx0xxxx", InstName.Vrintm, InstEmit32.Vrintm_V, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32); + SetAsimd("111100111x111010xxxx01000xx0xxxx", InstName.Vrintn, InstEmit32.Vrintn_V, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32); + SetAsimd("111100111x111010xxxx01111xx0xxxx", InstName.Vrintp, InstEmit32.Vrintp_V, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32); + SetAsimd("1111001x1x>>>xxxxxxx0010>xx1xxxx", InstName.Vrshr, InstEmit32.Vrshr, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32); + SetAsimd("111100101x>>>xxxxxxx100001x1xxx0", InstName.Vrshrn, InstEmit32.Vrshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); + SetAsimd("111100111x111011xxxx010x1xx0xxxx", InstName.Vrsqrte, InstEmit32.Vrsqrte, OpCode32SimdSqrte.Create, OpCode32SimdSqrte.CreateT32); + SetAsimd("111100100x10xxxxxxxx1111xxx1xxxx", InstName.Vrsqrts, InstEmit32.Vrsqrts, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("1111001x1x>>>xxxxxxx0011>xx1xxxx", InstName.Vrsra, InstEmit32.Vrsra, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32); + SetAsimd("111100101x>>>xxxxxxx0101>xx1xxxx", InstName.Vshl, InstEmit32.Vshl, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32); + SetAsimd("1111001x0xxxxxxxxxxx0100xxx0xxxx", InstName.Vshl, InstEmit32.Vshl_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("1111001x1x>>>xxxxxxx101000x1xxxx", InstName.Vshll, InstEmit32.Vshll, OpCode32SimdShImmLong.Create, OpCode32SimdShImmLong.CreateT32); // A1 encoding. + SetAsimd("1111001x1x>>>xxxxxxx0000>xx1xxxx", InstName.Vshr, InstEmit32.Vshr, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32); + SetAsimd("111100101x>>>xxxxxxx100000x1xxx0", InstName.Vshrn, InstEmit32.Vshrn, OpCode32SimdShImmNarrow.Create, OpCode32SimdShImmNarrow.CreateT32); + SetAsimd("1111001x1x>>>xxxxxxx0001>xx1xxxx", InstName.Vsra, InstEmit32.Vsra, OpCode32SimdShImm.Create, OpCode32SimdShImm.CreateT32); + SetAsimd("111101001x00xxxxxxxx0000xxx0xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101001x00xxxxxxxx0100xx0xxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101001x00xxxxxxxx1000x000xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101001x00xxxxxxxx1000x011xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101000x00xxxxxxxx0111xx0xxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 1. + SetAsimd("111101000x00xxxxxxxx1010xx<<xxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 2. + SetAsimd("111101000x00xxxxxxxx0110xx0xxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 3. + SetAsimd("111101000x00xxxxxxxx0010xxxxxxxx", InstName.Vst1, InstEmit32.Vst1, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 4. + SetAsimd("111101001x00xxxxxxxx0x01xxxxxxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101001x00xxxxxxxx1001xx0xxxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101000x00xxxxxxxx100x<<0xxxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 1, inc = 1/2 (itype). + SetAsimd("111101000x00xxxxxxxx100x<<10xxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 1, inc = 1/2 (itype). + SetAsimd("111101000x00xxxxxxxx0011<<xxxxxx", InstName.Vst2, InstEmit32.Vst2, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Regs = 2, inc = 2. + SetAsimd("111101001x00xxxxxxxx0x10xxx0xxxx", InstName.Vst3, InstEmit32.Vst3, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101001x00xxxxxxxx1010xx00xxxx", InstName.Vst3, InstEmit32.Vst3, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101000x00xxxxxxxx010x<<0xxxxx", InstName.Vst3, InstEmit32.Vst3, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Inc = 1/2 (itype). + SetAsimd("111101001x00xxxxxxxx0x11xxxxxxxx", InstName.Vst4, InstEmit32.Vst4, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101001x00xxxxxxxx1011xx<<xxxx", InstName.Vst4, InstEmit32.Vst4, OpCode32SimdMemSingle.Create, OpCode32SimdMemSingle.CreateT32); + SetAsimd("111101000x00xxxxxxxx000x<<xxxxxx", InstName.Vst4, InstEmit32.Vst4, OpCode32SimdMemPair.Create, OpCode32SimdMemPair.CreateT32); // Inc = 1/2 (itype). + SetAsimd("111100110xxxxxxxxxxx1000xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_I, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100100x10xxxxxxxx1101xxx0xxxx", InstName.Vsub, InstEmit32.Vsub_V, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("1111001x1x<<xxxxxxx00010x0x0xxxx", InstName.Vsubl, InstEmit32.Vsubl_I, OpCode32SimdRegLong.Create, OpCode32SimdRegLong.CreateT32); + SetAsimd("1111001x1x<<xxxxxxx00011x0x0xxxx", InstName.Vsubw, InstEmit32.Vsubw_I, OpCode32SimdRegWide.Create, OpCode32SimdRegWide.CreateT32); + SetAsimd("111100111x11xxxxxxxx10xxxxx0xxxx", InstName.Vtbl, InstEmit32.Vtbl, OpCode32SimdTbl.Create, OpCode32SimdTbl.CreateT32); + SetAsimd("111100111x11<<10xxxx00001xx0xxxx", InstName.Vtrn, InstEmit32.Vtrn, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32); + SetAsimd("111100100x<<xxxxxxxx1000xxx1xxxx", InstName.Vtst, InstEmit32.Vtst, OpCode32SimdReg.Create, OpCode32SimdReg.CreateT32); + SetAsimd("111100111x11<<10xxxx00010xx0xxxx", InstName.Vuzp, InstEmit32.Vuzp, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32); + SetAsimd("111100111x11<<10xxxx00011xx0xxxx", InstName.Vzip, InstEmit32.Vzip, OpCode32SimdCmpZ.Create, OpCode32SimdCmpZ.CreateT32); +#endregion + +#region "OpCode Table (AArch32, T16)" + SetT16("000<<xxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, OpCodeT16ShiftImm.Create); + SetT16("0001100xxxxxxxxx", InstName.Add, InstEmit32.Add, OpCodeT16AddSubReg.Create); + SetT16("0001101xxxxxxxxx", InstName.Sub, InstEmit32.Sub, OpCodeT16AddSubReg.Create); + SetT16("0001110xxxxxxxxx", InstName.Add, InstEmit32.Add, OpCodeT16AddSubImm3.Create); + SetT16("0001111xxxxxxxxx", InstName.Sub, InstEmit32.Sub, OpCodeT16AddSubImm3.Create); + SetT16("00100xxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, OpCodeT16AluImm8.Create); + SetT16("00101xxxxxxxxxxx", InstName.Cmp, InstEmit32.Cmp, OpCodeT16AluImm8.Create); + SetT16("00110xxxxxxxxxxx", InstName.Add, InstEmit32.Add, OpCodeT16AluImm8.Create); + SetT16("00111xxxxxxxxxxx", InstName.Sub, InstEmit32.Sub, OpCodeT16AluImm8.Create); + SetT16("0100000000xxxxxx", InstName.And, InstEmit32.And, OpCodeT16AluRegLow.Create); + SetT16("0100000001xxxxxx", InstName.Eor, InstEmit32.Eor, OpCodeT16AluRegLow.Create); + SetT16("0100000010xxxxxx", InstName.Mov, InstEmit32.Mov, OpCodeT16ShiftReg.Create); + SetT16("0100000011xxxxxx", InstName.Mov, InstEmit32.Mov, OpCodeT16ShiftReg.Create); + SetT16("0100000100xxxxxx", InstName.Mov, InstEmit32.Mov, OpCodeT16ShiftReg.Create); + SetT16("0100000101xxxxxx", InstName.Adc, InstEmit32.Adc, OpCodeT16AluRegLow.Create); + SetT16("0100000110xxxxxx", InstName.Sbc, InstEmit32.Sbc, OpCodeT16AluRegLow.Create); + SetT16("0100000111xxxxxx", InstName.Mov, InstEmit32.Mov, OpCodeT16ShiftReg.Create); + SetT16("0100001000xxxxxx", InstName.Tst, InstEmit32.Tst, OpCodeT16AluRegLow.Create); + SetT16("0100001001xxxxxx", InstName.Rsb, InstEmit32.Rsb, OpCodeT16AluImmZero.Create); + SetT16("0100001010xxxxxx", InstName.Cmp, InstEmit32.Cmp, OpCodeT16AluRegLow.Create); + SetT16("0100001011xxxxxx", InstName.Cmn, InstEmit32.Cmn, OpCodeT16AluRegLow.Create); + SetT16("0100001100xxxxxx", InstName.Orr, InstEmit32.Orr, OpCodeT16AluRegLow.Create); + SetT16("0100001101xxxxxx", InstName.Mul, InstEmit32.Mul, OpCodeT16AluRegLow.Create); + SetT16("0100001110xxxxxx", InstName.Bic, InstEmit32.Bic, OpCodeT16AluRegLow.Create); + SetT16("0100001111xxxxxx", InstName.Mvn, InstEmit32.Mvn, OpCodeT16AluRegLow.Create); + SetT16("01000100xxxxxxxx", InstName.Add, InstEmit32.Add, OpCodeT16AluRegHigh.Create); + SetT16("01000101xxxxxxxx", InstName.Cmp, InstEmit32.Cmp, OpCodeT16AluRegHigh.Create); + SetT16("01000110xxxxxxxx", InstName.Mov, InstEmit32.Mov, OpCodeT16AluRegHigh.Create); + SetT16("010001110xxxx000", InstName.Bx, InstEmit32.Bx, OpCodeT16BReg.Create); + SetT16("010001111xxxx000", InstName.Blx, InstEmit32.Blxr, OpCodeT16BReg.Create); + SetT16("01001xxxxxxxxxxx", InstName.Ldr, InstEmit32.Ldr, OpCodeT16MemLit.Create); + SetT16("0101000xxxxxxxxx", InstName.Str, InstEmit32.Str, OpCodeT16MemReg.Create); + SetT16("0101001xxxxxxxxx", InstName.Strh, InstEmit32.Strh, OpCodeT16MemReg.Create); + SetT16("0101010xxxxxxxxx", InstName.Strb, InstEmit32.Strb, OpCodeT16MemReg.Create); + SetT16("0101011xxxxxxxxx", InstName.Ldrsb, InstEmit32.Ldrsb, OpCodeT16MemReg.Create); + SetT16("0101100xxxxxxxxx", InstName.Ldr, InstEmit32.Ldr, OpCodeT16MemReg.Create); + SetT16("0101101xxxxxxxxx", InstName.Ldrh, InstEmit32.Ldrh, OpCodeT16MemReg.Create); + SetT16("0101110xxxxxxxxx", InstName.Ldrb, InstEmit32.Ldrb, OpCodeT16MemReg.Create); + SetT16("0101111xxxxxxxxx", InstName.Ldrsh, InstEmit32.Ldrsh, OpCodeT16MemReg.Create); + SetT16("01100xxxxxxxxxxx", InstName.Str, InstEmit32.Str, OpCodeT16MemImm5.Create); + SetT16("01101xxxxxxxxxxx", InstName.Ldr, InstEmit32.Ldr, OpCodeT16MemImm5.Create); + SetT16("01110xxxxxxxxxxx", InstName.Strb, InstEmit32.Strb, OpCodeT16MemImm5.Create); + SetT16("01111xxxxxxxxxxx", InstName.Ldrb, InstEmit32.Ldrb, OpCodeT16MemImm5.Create); + SetT16("10000xxxxxxxxxxx", InstName.Strh, InstEmit32.Strh, OpCodeT16MemImm5.Create); + SetT16("10001xxxxxxxxxxx", InstName.Ldrh, InstEmit32.Ldrh, OpCodeT16MemImm5.Create); + SetT16("10010xxxxxxxxxxx", InstName.Str, InstEmit32.Str, OpCodeT16MemSp.Create); + SetT16("10011xxxxxxxxxxx", InstName.Ldr, InstEmit32.Ldr, OpCodeT16MemSp.Create); + SetT16("10100xxxxxxxxxxx", InstName.Adr, InstEmit32.Adr, OpCodeT16Adr.Create); + SetT16("10101xxxxxxxxxxx", InstName.Add, InstEmit32.Add, OpCodeT16SpRel.Create); + SetT16("101100000xxxxxxx", InstName.Add, InstEmit32.Add, OpCodeT16AddSubSp.Create); + SetT16("101100001xxxxxxx", InstName.Sub, InstEmit32.Sub, OpCodeT16AddSubSp.Create); + SetT16("1011001000xxxxxx", InstName.Sxth, InstEmit32.Sxth, OpCodeT16AluUx.Create); + SetT16("1011001001xxxxxx", InstName.Sxtb, InstEmit32.Sxtb, OpCodeT16AluUx.Create); + SetT16("1011001010xxxxxx", InstName.Uxth, InstEmit32.Uxth, OpCodeT16AluUx.Create); + SetT16("1011001011xxxxxx", InstName.Uxtb, InstEmit32.Uxtb, OpCodeT16AluUx.Create); + SetT16("101100x1xxxxxxxx", InstName.Cbz, InstEmit32.Cbz, OpCodeT16BImmCmp.Create); + SetT16("1011010xxxxxxxxx", InstName.Push, InstEmit32.Stm, OpCodeT16MemStack.Create); + SetT16("1011101000xxxxxx", InstName.Rev, InstEmit32.Rev, OpCodeT16AluRegLow.Create); + SetT16("1011101001xxxxxx", InstName.Rev16, InstEmit32.Rev16, OpCodeT16AluRegLow.Create); + SetT16("1011101011xxxxxx", InstName.Revsh, InstEmit32.Revsh, OpCodeT16AluRegLow.Create); + SetT16("101110x1xxxxxxxx", InstName.Cbnz, InstEmit32.Cbnz, OpCodeT16BImmCmp.Create); + SetT16("1011110xxxxxxxxx", InstName.Pop, InstEmit32.Ldm, OpCodeT16MemStack.Create); + SetT16("1011111100000000", InstName.Nop, InstEmit32.Nop, OpCodeT16.Create); + SetT16("1011111100010000", InstName.Yield, InstEmit32.Nop, OpCodeT16.Create); + SetT16("1011111100100000", InstName.Wfe, InstEmit32.Nop, OpCodeT16.Create); + SetT16("1011111100110000", InstName.Wfi, InstEmit32.Nop, OpCodeT16.Create); + SetT16("1011111101000000", InstName.Sev, InstEmit32.Nop, OpCodeT16.Create); + SetT16("1011111101010000", InstName.Sevl, InstEmit32.Nop, OpCodeT16.Create); + SetT16("10111111011x0000", InstName.Hint, InstEmit32.Nop, OpCodeT16.Create); // Hint instruction + SetT16("101111111xxx0000", InstName.Hint, InstEmit32.Nop, OpCodeT16.Create); // Hint instruction + SetT16("10111111xxxx>>>>", InstName.It, InstEmit32.It, OpCodeT16IfThen.Create); + SetT16("11000xxxxxxxxxxx", InstName.Stm, InstEmit32.Stm, OpCodeT16MemMult.Create); + SetT16("11001xxxxxxxxxxx", InstName.Ldm, InstEmit32.Ldm, OpCodeT16MemMult.Create); + SetT16("1101<<<xxxxxxxxx", InstName.B, InstEmit32.B, OpCodeT16BImm8.Create); + SetT16("11011111xxxxxxxx", InstName.Svc, InstEmit32.Svc, OpCodeT16Exception.Create); + SetT16("11100xxxxxxxxxxx", InstName.B, InstEmit32.B, OpCodeT16BImm11.Create); +#endregion + +#region "OpCode Table (AArch32, T32)" + // Base + SetT32("11101011010xxxxx0xxxxxxxxxxxxxxx", InstName.Adc, InstEmit32.Adc, OpCodeT32AluRsImm.Create); + SetT32("11110x01010xxxxx0xxxxxxxxxxxxxxx", InstName.Adc, InstEmit32.Adc, OpCodeT32AluImm.Create); + SetT32("11101011000<xxxx0xxx<<<<xxxxxxxx", InstName.Add, InstEmit32.Add, OpCodeT32AluRsImm.Create); + SetT32("11110x01000<xxxx0xxx<<<<xxxxxxxx", InstName.Add, InstEmit32.Add, OpCodeT32AluImm.Create); + SetT32("11110x100000xxxx0xxxxxxxxxxxxxxx", InstName.Add, InstEmit32.Add, OpCodeT32AluImm12.Create); + SetT32("11101010000<xxxx0xxx<<<<xxxxxxxx", InstName.And, InstEmit32.And, OpCodeT32AluRsImm.Create); + SetT32("11110x00000<xxxx0xxx<<<<xxxxxxxx", InstName.And, InstEmit32.And, OpCodeT32AluImm.Create); + SetT32("11110x<<<xxxxxxx10x0xxxxxxxxxxxx", InstName.B, InstEmit32.B, OpCodeT32BImm20.Create); + SetT32("11110xxxxxxxxxxx10x1xxxxxxxxxxxx", InstName.B, InstEmit32.B, OpCodeT32BImm24.Create); + SetT32("11110011011011110xxxxxxxxx0xxxxx", InstName.Bfc, InstEmit32.Bfc, OpCodeT32AluBf.Create); + SetT32("111100110110<<<<0xxxxxxxxx0xxxxx", InstName.Bfi, InstEmit32.Bfi, OpCodeT32AluBf.Create); + SetT32("11101010001xxxxx0xxxxxxxxxxxxxxx", InstName.Bic, InstEmit32.Bic, OpCodeT32AluRsImm.Create); + SetT32("11110x00001xxxxx0xxxxxxxxxxxxxxx", InstName.Bic, InstEmit32.Bic, OpCodeT32AluImm.Create); + SetT32("11110xxxxxxxxxxx11x1xxxxxxxxxxxx", InstName.Bl, InstEmit32.Bl, OpCodeT32BImm24.Create); + SetT32("11110xxxxxxxxxxx11x0xxxxxxxxxxx0", InstName.Blx, InstEmit32.Blx, OpCodeT32BImm24.Create); + SetT32("111110101011xxxx1111xxxx1000xxxx", InstName.Clz, InstEmit32.Clz, OpCodeT32AluReg.Create); + SetT32("111010110001xxxx0xxx1111xxxxxxxx", InstName.Cmn, InstEmit32.Cmn, OpCodeT32AluRsImm.Create); + SetT32("11110x010001xxxx0xxx1111xxxxxxxx", InstName.Cmn, InstEmit32.Cmn, OpCodeT32AluImm.Create); + SetT32("111010111011xxxx0xxx1111xxxxxxxx", InstName.Cmp, InstEmit32.Cmp, OpCodeT32AluRsImm.Create); + SetT32("11110x011011xxxx0xxx1111xxxxxxxx", InstName.Cmp, InstEmit32.Cmp, OpCodeT32AluImm.Create); + SetT32("11110011101011111000000000010100", InstName.Csdb, InstEmit32.Csdb, OpCodeT32.Create); + SetT32("11101010100<xxxx0xxx<<<<xxxxxxxx", InstName.Eor, InstEmit32.Eor, OpCodeT32AluRsImm.Create); + SetT32("11110x00100<xxxx0xxx<<<<xxxxxxxx", InstName.Eor, InstEmit32.Eor, OpCodeT32AluImm.Create); + SetT32("11110011101011111000000000010000", InstName.Esb, InstEmit32.Nop, OpCodeT32.Create); // Error Synchronization Barrier (FEAT_RAS) + SetT32("1111001110101111100000000000011x", InstName.Hint, InstEmit32.Nop, OpCodeT32.Create); // Reserved Hint + SetT32("11110011101011111000000000001xxx", InstName.Hint, InstEmit32.Nop, OpCodeT32.Create); // Reserved Hint + SetT32("11110011101011111000000000010001", InstName.Hint, InstEmit32.Nop, OpCodeT32.Create); // Reserved Hint + SetT32("11110011101011111000000000010011", InstName.Hint, InstEmit32.Nop, OpCodeT32.Create); // Reserved Hint + SetT32("11110011101011111000000000010101", InstName.Hint, InstEmit32.Nop, OpCodeT32.Create); // Reserved Hint + SetT32("1111001110101111100000000001011x", InstName.Hint, InstEmit32.Nop, OpCodeT32.Create); // Reserved Hint + SetT32("11110011101011111000000000011xxx", InstName.Hint, InstEmit32.Nop, OpCodeT32.Create); // Reserved Hint + SetT32("111100111010111110000000001xxxxx", InstName.Hint, InstEmit32.Nop, OpCodeT32.Create); // Reserved Hint + SetT32("11110011101011111000000001xxxxxx", InstName.Hint, InstEmit32.Nop, OpCodeT32.Create); // Reserved Hint + SetT32("1111001110101111100000001xxxxxxx", InstName.Hint, InstEmit32.Nop, OpCodeT32.Create); // Reserved Hint + SetT32("111010001101xxxxxxxx111110101111", InstName.Lda, InstEmit32.Lda, OpCodeT32MemLdEx.Create); + SetT32("111010001101xxxxxxxx111110001111", InstName.Ldab, InstEmit32.Ldab, OpCodeT32MemLdEx.Create); + SetT32("111010001101xxxxxxxx111111101111", InstName.Ldaex, InstEmit32.Ldaex, OpCodeT32MemLdEx.Create); + SetT32("111010001101xxxxxxxx111111001111", InstName.Ldaexb, InstEmit32.Ldaexb, OpCodeT32MemLdEx.Create); + SetT32("111010001101xxxxxxxxxxxx11111111", InstName.Ldaexd, InstEmit32.Ldaexd, OpCodeT32MemLdEx.Create); + SetT32("111010001101xxxxxxxx111111011111", InstName.Ldaexh, InstEmit32.Ldaexh, OpCodeT32MemLdEx.Create); + SetT32("111010001101xxxxxxxx111110011111", InstName.Ldah, InstEmit32.Ldah, OpCodeT32MemLdEx.Create); + SetT32("1110100010x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldm, InstEmit32.Ldm, OpCodeT32MemMult.Create); + SetT32("1110100100x1xxxxxxxxxxxxxxxxxxxx", InstName.Ldm, InstEmit32.Ldm, OpCodeT32MemMult.Create); + SetT32("111110000101xxxxxxxx10x1xxxxxxxx", InstName.Ldr, InstEmit32.Ldr, OpCodeT32MemImm8.Create); + SetT32("111110000101xxxxxxxx1100xxxxxxxx", InstName.Ldr, InstEmit32.Ldr, OpCodeT32MemImm8.Create); + SetT32("111110000101xxxxxxxx11x1xxxxxxxx", InstName.Ldr, InstEmit32.Ldr, OpCodeT32MemImm8.Create); + SetT32("111110001101xxxxxxxxxxxxxxxxxxxx", InstName.Ldr, InstEmit32.Ldr, OpCodeT32MemImm12.Create); + SetT32("111110000101<<<<xxxx000000xxxxxx", InstName.Ldr, InstEmit32.Ldr, OpCodeT32MemRsImm.Create); + SetT32("111110000001xxxxxxxx10x1xxxxxxxx", InstName.Ldrb, InstEmit32.Ldrb, OpCodeT32MemImm8.Create); + SetT32("111110000001xxxx<<<<1100xxxxxxxx", InstName.Ldrb, InstEmit32.Ldrb, OpCodeT32MemImm8.Create); + SetT32("111110000001xxxxxxxx11x1xxxxxxxx", InstName.Ldrb, InstEmit32.Ldrb, OpCodeT32MemImm8.Create); + SetT32("111110001001xxxx<<<<xxxxxxxxxxxx", InstName.Ldrb, InstEmit32.Ldrb, OpCodeT32MemImm12.Create); + SetT32("111110000001xxxx<<<<000000xxxxxx", InstName.Ldrb, InstEmit32.Ldrb, OpCodeT32MemRsImm.Create); + SetT32("11101000x111<<<<xxxxxxxxxxxxxxxx", InstName.Ldrd, InstEmit32.Ldrd, OpCodeT32MemImm8D.Create); + SetT32("11101001x1x1<<<<xxxxxxxxxxxxxxxx", InstName.Ldrd, InstEmit32.Ldrd, OpCodeT32MemImm8D.Create); + SetT32("111110000011xxxxxxxx10x1xxxxxxxx", InstName.Ldrh, InstEmit32.Ldrh, OpCodeT32MemImm8.Create); + SetT32("111110000011xxxx<<<<1100xxxxxxxx", InstName.Ldrh, InstEmit32.Ldrh, OpCodeT32MemImm8.Create); + SetT32("111110000011xxxxxxxx11x1xxxxxxxx", InstName.Ldrh, InstEmit32.Ldrh, OpCodeT32MemImm8.Create); + SetT32("111110001011xxxx<<<<xxxxxxxxxxxx", InstName.Ldrh, InstEmit32.Ldrh, OpCodeT32MemImm12.Create); + SetT32("111110000011xxxx<<<<000000xxxxxx", InstName.Ldrh, InstEmit32.Ldrh, OpCodeT32MemRsImm.Create); + SetT32("111110010001xxxxxxxx10x1xxxxxxxx", InstName.Ldrsb, InstEmit32.Ldrsb, OpCodeT32MemImm8.Create); + SetT32("111110010001xxxx<<<<1100xxxxxxxx", InstName.Ldrsb, InstEmit32.Ldrsb, OpCodeT32MemImm8.Create); + SetT32("111110010001xxxxxxxx11x1xxxxxxxx", InstName.Ldrsb, InstEmit32.Ldrsb, OpCodeT32MemImm8.Create); + SetT32("111110011001xxxx<<<<xxxxxxxxxxxx", InstName.Ldrsb, InstEmit32.Ldrsb, OpCodeT32MemImm12.Create); + SetT32("111110010001xxxx<<<<000000xxxxxx", InstName.Ldrsb, InstEmit32.Ldrsb, OpCodeT32MemRsImm.Create); + SetT32("111110010011xxxxxxxx10x1xxxxxxxx", InstName.Ldrsh, InstEmit32.Ldrsh, OpCodeT32MemImm8.Create); + SetT32("111110010011xxxx<<<<1100xxxxxxxx", InstName.Ldrsh, InstEmit32.Ldrsh, OpCodeT32MemImm8.Create); + SetT32("111110010011xxxxxxxx11x1xxxxxxxx", InstName.Ldrsh, InstEmit32.Ldrsh, OpCodeT32MemImm8.Create); + SetT32("111110011011xxxx<<<<xxxxxxxxxxxx", InstName.Ldrsh, InstEmit32.Ldrsh, OpCodeT32MemImm12.Create); + SetT32("111110010011xxxx<<<<000000xxxxxx", InstName.Ldrsh, InstEmit32.Ldrsh, OpCodeT32MemRsImm.Create); + SetT32("111110110000xxxx<<<<xxxx0000xxxx", InstName.Mla, InstEmit32.Mla, OpCodeT32AluMla.Create); + SetT32("111110110000xxxxxxxxxxxx0001xxxx", InstName.Mls, InstEmit32.Mls, OpCodeT32AluMla.Create); + SetT32("11101010010x11110xxxxxxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, OpCodeT32AluRsImm.Create); + SetT32("111110100xxxxxxx1111xxxx0000xxxx", InstName.Mov, InstEmit32.Mov, OpCodeT32ShiftReg.Create); + SetT32("11110x00010x11110xxxxxxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, OpCodeT32AluImm.Create); + SetT32("11110x100100xxxx0xxxxxxxxxxxxxxx", InstName.Mov, InstEmit32.Mov, OpCodeT32MovImm16.Create); + SetT32("11110x101100xxxx0xxxxxxxxxxxxxxx", InstName.Movt, InstEmit32.Movt, OpCodeT32MovImm16.Create); + SetT32("111110110000xxxx1111xxxx0000xxxx", InstName.Mul, InstEmit32.Mul, OpCodeT32AluMla.Create); + SetT32("11101010011x11110xxxxxxxxxxxxxxx", InstName.Mvn, InstEmit32.Mvn, OpCodeT32AluRsImm.Create); + SetT32("11110x00011x11110xxxxxxxxxxxxxxx", InstName.Mvn, InstEmit32.Mvn, OpCodeT32AluImm.Create); + SetT32("11110011101011111000000000000000", InstName.Nop, InstEmit32.Nop, OpCodeT32.Create); + SetT32("11101010011x<<<<0xxxxxxxxxxxxxxx", InstName.Orn, InstEmit32.Orn, OpCodeT32AluRsImm.Create); + SetT32("11110x00011x<<<<0xxxxxxxxxxxxxxx", InstName.Orn, InstEmit32.Orn, OpCodeT32AluImm.Create); + SetT32("11101010010x<<<<0xxxxxxxxxxxxxxx", InstName.Orr, InstEmit32.Orr, OpCodeT32AluRsImm.Create); + SetT32("11110x00010x<<<<0xxxxxxxxxxxxxxx", InstName.Orr, InstEmit32.Orr, OpCodeT32AluImm.Create); + SetT32("1111100010x1xxxx1111xxxxxxxxxxxx", InstName.Pld, InstEmit32.Nop, OpCodeT32.Create); + SetT32("1111100000x1xxxx11111100xxxxxxxx", InstName.Pld, InstEmit32.Nop, OpCodeT32.Create); + SetT32("1111100000x1xxxx1111000000xxxxxx", InstName.Pld, InstEmit32.Nop, OpCodeT32.Create); + SetT32("11101011110xxxxx0xxxxxxxxxxxxxxx", InstName.Rsb, InstEmit32.Rsb, OpCodeT32AluRsImm.Create); + SetT32("11110x01110xxxxx0xxxxxxxxxxxxxxx", InstName.Rsb, InstEmit32.Rsb, OpCodeT32AluImm.Create); + SetT32("111110101000xxxx1111xxxx0000xxxx", InstName.Sadd8, InstEmit32.Sadd8, OpCodeT32AluReg.Create); + SetT32("11101011011xxxxx0xxxxxxxxxxxxxxx", InstName.Sbc, InstEmit32.Sbc, OpCodeT32AluRsImm.Create); + SetT32("11110x01011xxxxx0xxxxxxxxxxxxxxx", InstName.Sbc, InstEmit32.Sbc, OpCodeT32AluImm.Create); + SetT32("111100110100xxxx0xxxxxxxxx0xxxxx", InstName.Sbfx, InstEmit32.Sbfx, OpCodeT32AluBf.Create); + SetT32("111110111001xxxx1111xxxx1111xxxx", InstName.Sdiv, InstEmit32.Sdiv, OpCodeT32AluMla.Create); + SetT32("111110101010xxxx1111xxxx1000xxxx", InstName.Sel, InstEmit32.Sel, OpCodeT32AluReg.Create); + SetT32("111110101000xxxx1111xxxx0010xxxx", InstName.Shadd8, InstEmit32.Shadd8, OpCodeT32AluReg.Create); + SetT32("111110101100xxxx1111xxxx0010xxxx", InstName.Shsub8, InstEmit32.Shsub8, OpCodeT32AluReg.Create); + SetT32("11110011101011111000000000000100", InstName.Sev, InstEmit32.Nop, OpCodeT32.Create); + SetT32("11110011101011111000000000000101", InstName.Sevl, InstEmit32.Nop, OpCodeT32.Create); + SetT32("111110110001xxxx<<<<xxxx00xxxxxx", InstName.Smla__, InstEmit32.Smla__, OpCodeT32AluMla.Create); + SetT32("111110111100xxxxxxxxxxxx0000xxxx", InstName.Smlal, InstEmit32.Smlal, OpCodeT32AluUmull.Create); + SetT32("111110111100xxxxxxxxxxxx10xxxxxx", InstName.Smlal__, InstEmit32.Smlal__, OpCodeT32AluUmull.Create); + SetT32("111110110011xxxx<<<<xxxx000xxxxx", InstName.Smlaw_, InstEmit32.Smlaw_, OpCodeT32AluMla.Create); + SetT32("111110110101xxxx<<<<xxxx000xxxxx", InstName.Smmla, InstEmit32.Smmla, OpCodeT32AluMla.Create); + SetT32("111110110110xxxxxxxxxxxx000xxxxx", InstName.Smmls, InstEmit32.Smmls, OpCodeT32AluMla.Create); + SetT32("111110110001xxxx1111xxxx00xxxxxx", InstName.Smul__, InstEmit32.Smul__, OpCodeT32AluMla.Create); + SetT32("111110111000xxxxxxxxxxxx0000xxxx", InstName.Smull, InstEmit32.Smull, OpCodeT32AluUmull.Create); + SetT32("111110110011xxxx1111xxxx000xxxxx", InstName.Smulw_, InstEmit32.Smulw_, OpCodeT32AluMla.Create); + SetT32("111110101100xxxx1111xxxx0000xxxx", InstName.Ssub8, InstEmit32.Ssub8, OpCodeT32AluReg.Create); + SetT32("111010001100xxxxxxxx111110101111", InstName.Stl, InstEmit32.Stl, OpCodeT32MemStEx.Create); + SetT32("111010001100xxxxxxxx111110001111", InstName.Stlb, InstEmit32.Stlb, OpCodeT32MemStEx.Create); + SetT32("111010001100xxxxxxxx11111110xxxx", InstName.Stlex, InstEmit32.Stlex, OpCodeT32MemStEx.Create); + SetT32("111010001100xxxxxxxx11111100xxxx", InstName.Stlexb, InstEmit32.Stlexb, OpCodeT32MemStEx.Create); + SetT32("111010001100xxxxxxxxxxxx1111xxxx", InstName.Stlexd, InstEmit32.Stlexd, OpCodeT32MemStEx.Create); + SetT32("111010001100xxxxxxxx11111101xxxx", InstName.Stlexh, InstEmit32.Stlexh, OpCodeT32MemStEx.Create); + SetT32("111010001100xxxxxxxx111110011111", InstName.Stlh, InstEmit32.Stlh, OpCodeT32MemStEx.Create); + SetT32("1110100010x0xxxx0xxxxxxxxxxxxxxx", InstName.Stm, InstEmit32.Stm, OpCodeT32MemMult.Create); + SetT32("1110100100x0xxxx0xxxxxxxxxxxxxxx", InstName.Stm, InstEmit32.Stm, OpCodeT32MemMult.Create); + SetT32("111110000100<<<<xxxx10x1xxxxxxxx", InstName.Str, InstEmit32.Str, OpCodeT32MemImm8.Create); + SetT32("111110000100<<<<xxxx1100xxxxxxxx", InstName.Str, InstEmit32.Str, OpCodeT32MemImm8.Create); + SetT32("111110000100<<<<xxxx11x1xxxxxxxx", InstName.Str, InstEmit32.Str, OpCodeT32MemImm8.Create); + SetT32("111110001100<<<<xxxxxxxxxxxxxxxx", InstName.Str, InstEmit32.Str, OpCodeT32MemImm12.Create); + SetT32("111110000100<<<<xxxx000000xxxxxx", InstName.Str, InstEmit32.Str, OpCodeT32MemRsImm.Create); + SetT32("111110000000<<<<xxxx10x1xxxxxxxx", InstName.Strb, InstEmit32.Strb, OpCodeT32MemImm8.Create); + SetT32("111110000000<<<<xxxx1100xxxxxxxx", InstName.Strb, InstEmit32.Strb, OpCodeT32MemImm8.Create); + SetT32("111110000000<<<<xxxx11x1xxxxxxxx", InstName.Strb, InstEmit32.Strb, OpCodeT32MemImm8.Create); + SetT32("111110001000<<<<xxxxxxxxxxxxxxxx", InstName.Strb, InstEmit32.Strb, OpCodeT32MemImm12.Create); + SetT32("111110000000<<<<xxxx000000xxxxxx", InstName.Strb, InstEmit32.Strb, OpCodeT32MemRsImm.Create); + SetT32("11101000x110<<<<xxxxxxxxxxxxxxxx", InstName.Strd, InstEmit32.Strd, OpCodeT32MemImm8D.Create); + SetT32("11101001x1x0<<<<xxxxxxxxxxxxxxxx", InstName.Strd, InstEmit32.Strd, OpCodeT32MemImm8D.Create); + SetT32("111110000010<<<<xxxx10x1xxxxxxxx", InstName.Strh, InstEmit32.Strh, OpCodeT32MemImm8.Create); + SetT32("111110000010<<<<xxxx1100xxxxxxxx", InstName.Strh, InstEmit32.Strh, OpCodeT32MemImm8.Create); + SetT32("111110000010<<<<xxxx11x1xxxxxxxx", InstName.Strh, InstEmit32.Strh, OpCodeT32MemImm8.Create); + SetT32("111110001010<<<<xxxxxxxxxxxxxxxx", InstName.Strh, InstEmit32.Strh, OpCodeT32MemImm12.Create); + SetT32("111110000010<<<<xxxx000000xxxxxx", InstName.Strh, InstEmit32.Strh, OpCodeT32MemRsImm.Create); + SetT32("11101011101<xxxx0xxx<<<<xxxxxxxx", InstName.Sub, InstEmit32.Sub, OpCodeT32AluRsImm.Create); + SetT32("11110x01101<xxxx0xxx<<<<xxxxxxxx", InstName.Sub, InstEmit32.Sub, OpCodeT32AluImm.Create); + SetT32("11110x101010xxxx0xxxxxxxxxxxxxxx", InstName.Sub, InstEmit32.Sub, OpCodeT32AluImm12.Create); + SetT32("111110100100xxxx1111xxxx10xxxxxx", InstName.Sxtb, InstEmit32.Sxtb, OpCodeT32AluUx.Create); + SetT32("111110100010xxxx1111xxxx10xxxxxx", InstName.Sxtb16, InstEmit32.Sxtb16, OpCodeT32AluUx.Create); + SetT32("111110100000xxxx1111xxxx10xxxxxx", InstName.Sxth, InstEmit32.Sxth, OpCodeT32AluUx.Create); + SetT32("111010001101xxxx111100000000xxxx", InstName.Tbb, InstEmit32.Tbb, OpCodeT32Tb.Create); + SetT32("111010001101xxxx111100000001xxxx", InstName.Tbh, InstEmit32.Tbh, OpCodeT32Tb.Create); + SetT32("111010101001xxxx0xxx1111xxxxxxxx", InstName.Teq, InstEmit32.Teq, OpCodeT32AluRsImm.Create); + SetT32("11110x001001xxxx0xxx1111xxxxxxxx", InstName.Teq, InstEmit32.Teq, OpCodeT32AluImm.Create); + SetT32("11110011101011111000000000010010", InstName.Tsb, InstEmit32.Nop, OpCodeT32.Create); // Trace Synchronization Barrier (FEAT_TRF) + SetT32("111010100001xxxx0xxx1111xxxxxxxx", InstName.Tst, InstEmit32.Tst, OpCodeT32AluRsImm.Create); + SetT32("11110x000001xxxx0xxx1111xxxxxxxx", InstName.Tst, InstEmit32.Tst, OpCodeT32AluImm.Create); + SetT32("111110101000xxxx1111xxxx0100xxxx", InstName.Uadd8, InstEmit32.Uadd8, OpCodeT32AluReg.Create); + SetT32("111100111100xxxx0xxxxxxxxx0xxxxx", InstName.Ubfx, InstEmit32.Ubfx, OpCodeT32AluBf.Create); + SetT32("111110111011xxxx1111xxxx1111xxxx", InstName.Udiv, InstEmit32.Udiv, OpCodeT32AluMla.Create); + SetT32("111110101000xxxx1111xxxx0110xxxx", InstName.Uhadd8, InstEmit32.Uhadd8, OpCodeT32AluReg.Create); + SetT32("111110101100xxxx1111xxxx0110xxxx", InstName.Uhsub8, InstEmit32.Uhsub8, OpCodeT32AluReg.Create); + SetT32("111110111110xxxxxxxxxxxx0110xxxx", InstName.Umaal, InstEmit32.Umaal, OpCodeT32AluUmull.Create); + SetT32("111110111110xxxxxxxxxxxx0000xxxx", InstName.Umlal, InstEmit32.Umlal, OpCodeT32AluUmull.Create); + SetT32("111110111010xxxxxxxxxxxx0000xxxx", InstName.Umull, InstEmit32.Umull, OpCodeT32AluUmull.Create); + SetT32("111110101100xxxx1111xxxx0100xxxx", InstName.Usub8, InstEmit32.Usub8, OpCodeT32AluReg.Create); + SetT32("111110100101xxxx1111xxxx10xxxxxx", InstName.Uxtb, InstEmit32.Uxtb, OpCodeT32AluUx.Create); + SetT32("111110100011xxxx1111xxxx10xxxxxx", InstName.Uxtb16, InstEmit32.Uxtb16, OpCodeT32AluUx.Create); + SetT32("111110100001xxxx1111xxxx10xxxxxx", InstName.Uxth, InstEmit32.Uxth, OpCodeT32AluUx.Create); + SetT32("11110011101011111000000000000010", InstName.Wfe, InstEmit32.Nop, OpCodeT32.Create); + SetT32("11110011101011111000000000000011", InstName.Wfi, InstEmit32.Nop, OpCodeT32.Create); + SetT32("11110011101011111000000000000001", InstName.Yield, InstEmit32.Nop, OpCodeT32.Create); +#endregion + + FillFastLookupTable(InstA32FastLookup, AllInstA32, ToFastLookupIndexA); + FillFastLookupTable(InstT32FastLookup, AllInstT32, ToFastLookupIndexT); + FillFastLookupTable(InstA64FastLookup, AllInstA64, ToFastLookupIndexA); + } + + private static void FillFastLookupTable(InstInfo[][] table, List<InstInfo> allInsts, Func<int, int> ToFastLookupIndex) + { + List<InstInfo>[] temp = new List<InstInfo>[FastLookupSize]; + + for (int index = 0; index < temp.Length; index++) + { + temp[index] = new List<InstInfo>(); + } + + foreach (InstInfo inst in allInsts) + { + int mask = ToFastLookupIndex(inst.Mask); + int value = ToFastLookupIndex(inst.Value); + + for (int index = 0; index < temp.Length; index++) + { + if ((index & mask) == value) + { + temp[index].Add(inst); + } + } + } + + for (int index = 0; index < temp.Length; index++) + { + table[index] = temp[index].ToArray(); + } + } + + private static void SetA32(string encoding, InstName name, InstEmitter emitter, MakeOp makeOp) + { + Set(encoding, AllInstA32, new InstDescriptor(name, emitter), makeOp); + } + + private static void SetT16(string encoding, InstName name, InstEmitter emitter, MakeOp makeOp) + { + encoding = "xxxxxxxxxxxxxxxx" + encoding; + Set(encoding, AllInstT32, new InstDescriptor(name, emitter), makeOp); + } + + private static void SetT32(string encoding, InstName name, InstEmitter emitter, MakeOp makeOp) + { + string reversedEncoding = $"{encoding.AsSpan(16)}{encoding.AsSpan(0, 16)}"; + MakeOp reversedMakeOp = + (inst, address, opCode) + => makeOp(inst, address, (int)BitOperations.RotateRight((uint)opCode, 16)); + Set(reversedEncoding, AllInstT32, new InstDescriptor(name, emitter), reversedMakeOp); + } + + private static void SetVfp(string encoding, InstName name, InstEmitter emitter, MakeOp makeOpA32, MakeOp makeOpT32) + { + SetA32(encoding, name, emitter, makeOpA32); + + string thumbEncoding = encoding; + if (thumbEncoding.StartsWith("<<<<")) + { + thumbEncoding = $"1110{thumbEncoding.AsSpan(4)}"; + } + SetT32(thumbEncoding, name, emitter, makeOpT32); + } + + private static void SetAsimd(string encoding, InstName name, InstEmitter emitter, MakeOp makeOpA32, MakeOp makeOpT32) + { + SetA32(encoding, name, emitter, makeOpA32); + + string thumbEncoding = encoding; + if (thumbEncoding.StartsWith("11110100")) + { + thumbEncoding = $"11111001{encoding.AsSpan(8)}"; + } + else if (thumbEncoding.StartsWith("1111001x")) + { + thumbEncoding = $"111x1111{encoding.AsSpan(8)}"; + } + else if (thumbEncoding.StartsWith("11110010")) + { + thumbEncoding = $"11101111{encoding.AsSpan(8)}"; + } + else if (thumbEncoding.StartsWith("11110011")) + { + thumbEncoding = $"11111111{encoding.AsSpan(8)}"; + } + else + { + throw new ArgumentException("Invalid ASIMD instruction encoding"); + } + SetT32(thumbEncoding, name, emitter, makeOpT32); + } + + private static void SetA64(string encoding, InstName name, InstEmitter emitter, MakeOp makeOp) + { + Set(encoding, AllInstA64, new InstDescriptor(name, emitter), makeOp); + } + + private static void Set(string encoding, List<InstInfo> list, InstDescriptor inst, MakeOp makeOp) + { + int bit = encoding.Length - 1; + int value = 0; + int xMask = 0; + int xBits = 0; + + int[] xPos = new int[encoding.Length]; + + int blacklisted = 0; + + for (int index = 0; index < encoding.Length; index++, bit--) + { + // Note: < and > are used on special encodings. + // The < means that we should never have ALL bits with the '<' set. + // So, when the encoding has <<, it means that 00, 01, and 10 are valid, + // but not 11. <<< is 000, 001, ..., 110 but NOT 111, and so on... + // For >, the invalid value is zero. So, for >> 01, 10 and 11 are valid, + // but 00 isn't. + char chr = encoding[index]; + + if (chr == '1') + { + value |= 1 << bit; + } + else if (chr == 'x') + { + xMask |= 1 << bit; + } + else if (chr == '>') + { + xPos[xBits++] = bit; + } + else if (chr == '<') + { + xPos[xBits++] = bit; + + blacklisted |= 1 << bit; + } + else if (chr != '0') + { + throw new ArgumentException(nameof(encoding)); + } + } + + xMask = ~xMask; + + if (xBits == 0) + { + list.Add(new InstInfo(xMask, value, inst, makeOp)); + + return; + } + + for (int index = 0; index < (1 << xBits); index++) + { + int mask = 0; + + for (int x = 0; x < xBits; x++) + { + mask |= ((index >> x) & 1) << xPos[x]; + } + + if (mask != blacklisted) + { + list.Add(new InstInfo(xMask, value | mask, inst, makeOp)); + } + } + } + + public static (InstDescriptor inst, MakeOp makeOp) GetInstA32(int opCode) + { + return GetInstFromList(InstA32FastLookup[ToFastLookupIndexA(opCode)], opCode); + } + + public static (InstDescriptor inst, MakeOp makeOp) GetInstT32(int opCode) + { + return GetInstFromList(InstT32FastLookup[ToFastLookupIndexT(opCode)], opCode); + } + + public static (InstDescriptor inst, MakeOp makeOp) GetInstA64(int opCode) + { + return GetInstFromList(InstA64FastLookup[ToFastLookupIndexA(opCode)], opCode); + } + + private static (InstDescriptor inst, MakeOp makeOp) GetInstFromList(InstInfo[] insts, int opCode) + { + foreach (InstInfo info in insts) + { + if ((opCode & info.Mask) == info.Value) + { + return (info.Inst, info.MakeOp); + } + } + + return (new InstDescriptor(InstName.Und, InstEmit.Und), null); + } + + private static int ToFastLookupIndexA(int value) + { + return ((value >> 10) & 0x00F) | ((value >> 18) & 0xFF0); + } + + private static int ToFastLookupIndexT(int value) + { + return (value >> 4) & 0xFFF; + } + } +} diff --git a/src/ARMeilleure/Decoders/Optimizations/TailCallRemover.cs b/src/ARMeilleure/Decoders/Optimizations/TailCallRemover.cs new file mode 100644 index 00000000..17c17812 --- /dev/null +++ b/src/ARMeilleure/Decoders/Optimizations/TailCallRemover.cs @@ -0,0 +1,88 @@ +using System; +using System.Collections.Generic; + +namespace ARMeilleure.Decoders.Optimizations +{ + static class TailCallRemover + { + public static Block[] RunPass(ulong entryAddress, List<Block> blocks) + { + // Detect tail calls: + // - Assume this function spans the space covered by contiguous code blocks surrounding the entry address. + // - A jump to an area outside this contiguous region will be treated as an exit block. + // - Include a small allowance for jumps outside the contiguous range. + + if (!Decoder.BinarySearch(blocks, entryAddress, out int entryBlockId)) + { + throw new InvalidOperationException("Function entry point is not contained in a block."); + } + + const ulong allowance = 4; + + Block entryBlock = blocks[entryBlockId]; + + Block startBlock = entryBlock; + Block endBlock = entryBlock; + + int startBlockIndex = entryBlockId; + int endBlockIndex = entryBlockId; + + for (int i = entryBlockId + 1; i < blocks.Count; i++) // Search forwards. + { + Block block = blocks[i]; + + if (endBlock.EndAddress < block.Address - allowance) + { + break; // End of contiguous function. + } + + endBlock = block; + endBlockIndex = i; + } + + for (int i = entryBlockId - 1; i >= 0; i--) // Search backwards. + { + Block block = blocks[i]; + + if (startBlock.Address > block.EndAddress + allowance) + { + break; // End of contiguous function. + } + + startBlock = block; + startBlockIndex = i; + } + + if (startBlockIndex == 0 && endBlockIndex == blocks.Count - 1) + { + return blocks.ToArray(); // Nothing to do here. + } + + // Mark branches whose target is outside of the contiguous region as an exit block. + for (int i = startBlockIndex; i <= endBlockIndex; i++) + { + Block block = blocks[i]; + + if (block.Branch != null && (block.Branch.Address > endBlock.EndAddress || block.Branch.EndAddress < startBlock.Address)) + { + block.Branch.Exit = true; + } + } + + var newBlocks = new List<Block>(blocks.Count); + + // Finally, rebuild decoded block list, ignoring blocks outside the contiguous range. + for (int i = 0; i < blocks.Count; i++) + { + Block block = blocks[i]; + + if (block.Exit || (i >= startBlockIndex && i <= endBlockIndex)) + { + newBlocks.Add(block); + } + } + + return newBlocks.ToArray(); + } + } +} diff --git a/src/ARMeilleure/Decoders/RegisterSize.cs b/src/ARMeilleure/Decoders/RegisterSize.cs new file mode 100644 index 00000000..c9cea03e --- /dev/null +++ b/src/ARMeilleure/Decoders/RegisterSize.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.Decoders +{ + enum RegisterSize + { + Int32, + Int64, + Simd64, + Simd128 + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Decoders/ShiftType.cs b/src/ARMeilleure/Decoders/ShiftType.cs new file mode 100644 index 00000000..8583f16a --- /dev/null +++ b/src/ARMeilleure/Decoders/ShiftType.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.Decoders +{ + enum ShiftType + { + Lsl = 0, + Lsr = 1, + Asr = 2, + Ror = 3 + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Diagnostics/IRDumper.cs b/src/ARMeilleure/Diagnostics/IRDumper.cs new file mode 100644 index 00000000..3d1a60e5 --- /dev/null +++ b/src/ARMeilleure/Diagnostics/IRDumper.cs @@ -0,0 +1,311 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; + +namespace ARMeilleure.Diagnostics +{ + class IRDumper + { + private const string Indentation = " "; + + private int _indentLevel; + + private readonly StringBuilder _builder; + + private readonly Dictionary<Operand, string> _localNames; + private readonly Dictionary<ulong, string> _symbolNames; + + public IRDumper(int indent) + { + _indentLevel = indent; + + _builder = new StringBuilder(); + + _localNames = new Dictionary<Operand, string>(); + _symbolNames = new Dictionary<ulong, string>(); + } + + private void Indent() + { + _builder.EnsureCapacity(_builder.Capacity + _indentLevel * Indentation.Length); + + for (int index = 0; index < _indentLevel; index++) + { + _builder.Append(Indentation); + } + } + + private void IncreaseIndentation() + { + _indentLevel++; + } + + private void DecreaseIndentation() + { + _indentLevel--; + } + + private void DumpBlockName(BasicBlock block) + { + _builder.Append("block").Append(block.Index); + } + + private void DumpBlockHeader(BasicBlock block) + { + DumpBlockName(block); + + if (block.Frequency == BasicBlockFrequency.Cold) + { + _builder.Append(" cold"); + } + + if (block.SuccessorsCount > 0) + { + _builder.Append(" ("); + + for (int i = 0; i < block.SuccessorsCount; i++) + { + DumpBlockName(block.GetSuccessor(i)); + + if (i < block.SuccessorsCount - 1) + { + _builder.Append(", "); + } + } + + _builder.Append(')'); + } + + _builder.Append(':'); + } + + private void DumpOperand(Operand operand) + { + if (operand == default) + { + _builder.Append("<NULL>"); + return; + } + + _builder.Append(GetTypeName(operand.Type)).Append(' '); + + switch (operand.Kind) + { + case OperandKind.LocalVariable: + if (!_localNames.TryGetValue(operand, out string localName)) + { + localName = $"%{_localNames.Count}"; + + _localNames.Add(operand, localName); + } + + _builder.Append(localName); + break; + + case OperandKind.Register: + Register reg = operand.GetRegister(); + + switch (reg.Type) + { + case RegisterType.Flag: _builder.Append('b'); break; + case RegisterType.FpFlag: _builder.Append('f'); break; + case RegisterType.Integer: _builder.Append('r'); break; + case RegisterType.Vector: _builder.Append('v'); break; + } + + _builder.Append(reg.Index); + break; + + case OperandKind.Constant: + string symbolName = Symbols.Get(operand.Value); + + if (symbolName != null && !_symbolNames.ContainsKey(operand.Value)) + { + _symbolNames.Add(operand.Value, symbolName); + } + + _builder.Append("0x").Append(operand.Value.ToString("X")); + break; + + case OperandKind.Memory: + var memOp = operand.GetMemory(); + + _builder.Append('['); + + DumpOperand(memOp.BaseAddress); + + if (memOp.Index != default) + { + _builder.Append(" + "); + + DumpOperand(memOp.Index); + + switch (memOp.Scale) + { + case Multiplier.x2: _builder.Append("*2"); break; + case Multiplier.x4: _builder.Append("*4"); break; + case Multiplier.x8: _builder.Append("*8"); break; + } + } + + if (memOp.Displacement != 0) + { + _builder.Append(" + 0x").Append(memOp.Displacement.ToString("X")); + } + + _builder.Append(']'); + break; + + default: + _builder.Append(operand.Type); + break; + } + } + + private void DumpNode(ControlFlowGraph cfg, Operation node) + { + for (int index = 0; index < node.DestinationsCount; index++) + { + DumpOperand(node.GetDestination(index)); + + if (index == node.DestinationsCount - 1) + { + _builder.Append(" = "); + } + else + { + _builder.Append(", "); + } + } + + switch (node) + { + case Operation operation: + if (operation.Instruction == Instruction.Phi) + { + PhiOperation phi = operation.AsPhi(); + + _builder.Append("Phi "); + + for (int index = 0; index < phi.SourcesCount; index++) + { + _builder.Append('('); + + DumpBlockName(phi.GetBlock(cfg, index)); + + _builder.Append(": "); + + DumpOperand(phi.GetSource(index)); + + _builder.Append(')'); + + if (index < phi.SourcesCount - 1) + { + _builder.Append(", "); + } + } + + break; + } + + bool comparison = false; + + _builder.Append(operation.Instruction); + + if (operation.Instruction == Instruction.Extended) + { + _builder.Append('.').Append(operation.Intrinsic); + } + else if (operation.Instruction == Instruction.BranchIf || + operation.Instruction == Instruction.Compare) + { + comparison = true; + } + + _builder.Append(' '); + + for (int index = 0; index < operation.SourcesCount; index++) + { + Operand source = operation.GetSource(index); + + if (index < operation.SourcesCount - 1) + { + DumpOperand(source); + + _builder.Append(", "); + } + else if (comparison) + { + _builder.Append((Comparison)source.AsInt32()); + } + else + { + DumpOperand(source); + } + } + break; + } + + if (_symbolNames.Count == 1) + { + _builder.Append(" ;; ").Append(_symbolNames.First().Value); + } + else if (_symbolNames.Count > 1) + { + _builder.Append(" ;;"); + + foreach ((ulong value, string name) in _symbolNames) + { + _builder.Append(" 0x").Append(value.ToString("X")).Append(" = ").Append(name); + } + } + + // Reset the set of symbols for the next Node we're going to dump. + _symbolNames.Clear(); + } + + public static string GetDump(ControlFlowGraph cfg) + { + var dumper = new IRDumper(1); + + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + dumper.Indent(); + dumper.DumpBlockHeader(block); + + dumper._builder.AppendLine(); + + dumper.IncreaseIndentation(); + + for (Operation node = block.Operations.First; node != default; node = node.ListNext) + { + dumper.Indent(); + dumper.DumpNode(cfg, node); + + dumper._builder.AppendLine(); + } + + dumper.DecreaseIndentation(); + } + + return dumper._builder.ToString(); + } + + private static string GetTypeName(OperandType type) + { + return type switch + { + OperandType.None => "none", + OperandType.I32 => "i32", + OperandType.I64 => "i64", + OperandType.FP32 => "f32", + OperandType.FP64 => "f64", + OperandType.V128 => "v128", + _ => throw new ArgumentException($"Invalid operand type \"{type}\"."), + }; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Diagnostics/Logger.cs b/src/ARMeilleure/Diagnostics/Logger.cs new file mode 100644 index 00000000..07a60667 --- /dev/null +++ b/src/ARMeilleure/Diagnostics/Logger.cs @@ -0,0 +1,56 @@ +using ARMeilleure.Translation; +using System; +using System.Diagnostics; + +namespace ARMeilleure.Diagnostics +{ + static class Logger + { + private static long _startTime; + + private static long[] _accumulatedTime; + + static Logger() + { + _accumulatedTime = new long[(int)PassName.Count]; + } + + [Conditional("M_DEBUG")] + public static void StartPass(PassName name) + { + WriteOutput(name + " pass started..."); + + _startTime = Stopwatch.GetTimestamp(); + } + + [Conditional("M_DEBUG")] + public static void EndPass(PassName name, ControlFlowGraph cfg) + { + EndPass(name); + + WriteOutput("IR after " + name + " pass:"); + + WriteOutput(IRDumper.GetDump(cfg)); + } + + [Conditional("M_DEBUG")] + public static void EndPass(PassName name) + { + long elapsedTime = Stopwatch.GetTimestamp() - _startTime; + + _accumulatedTime[(int)name] += elapsedTime; + + WriteOutput($"{name} pass ended after {GetMilliseconds(_accumulatedTime[(int)name])} ms..."); + } + + private static long GetMilliseconds(long ticks) + { + return (long)(((double)ticks / Stopwatch.Frequency) * 1000); + } + + private static void WriteOutput(string text) + { + Console.WriteLine(text); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Diagnostics/PassName.cs b/src/ARMeilleure/Diagnostics/PassName.cs new file mode 100644 index 00000000..e34bf0d2 --- /dev/null +++ b/src/ARMeilleure/Diagnostics/PassName.cs @@ -0,0 +1,19 @@ +namespace ARMeilleure.Diagnostics +{ + enum PassName + { + Decoding, + Translation, + RegisterUsage, + TailMerge, + Dominance, + SsaConstruction, + RegisterToLocal, + Optimization, + PreAllocation, + RegisterAllocation, + CodeGeneration, + + Count + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Diagnostics/Symbols.cs b/src/ARMeilleure/Diagnostics/Symbols.cs new file mode 100644 index 00000000..6bde62f5 --- /dev/null +++ b/src/ARMeilleure/Diagnostics/Symbols.cs @@ -0,0 +1,84 @@ +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Diagnostics; + +namespace ARMeilleure.Diagnostics +{ + static class Symbols + { + private readonly struct RangedSymbol + { + public readonly ulong Start; + public readonly ulong End; + public readonly ulong ElementSize; + public readonly string Name; + + public RangedSymbol(ulong start, ulong end, ulong elemSize, string name) + { + Start = start; + End = end; + ElementSize = elemSize; + Name = name; + } + } + + private static readonly ConcurrentDictionary<ulong, string> _symbols; + private static readonly List<RangedSymbol> _rangedSymbols; + + static Symbols() + { + _symbols = new ConcurrentDictionary<ulong, string>(); + _rangedSymbols = new List<RangedSymbol>(); + } + + public static string Get(ulong address) + { + string result; + + if (_symbols.TryGetValue(address, out result)) + { + return result; + } + + lock (_rangedSymbols) + { + foreach (RangedSymbol symbol in _rangedSymbols) + { + if (address >= symbol.Start && address <= symbol.End) + { + ulong diff = address - symbol.Start; + ulong rem = diff % symbol.ElementSize; + + result = symbol.Name + "_" + diff / symbol.ElementSize; + + if (rem != 0) + { + result += "+" + rem; + } + + _symbols.TryAdd(address, result); + + return result; + } + } + } + + return null; + } + + [Conditional("M_DEBUG")] + public static void Add(ulong address, string name) + { + _symbols.TryAdd(address, name); + } + + [Conditional("M_DEBUG")] + public static void Add(ulong address, ulong size, ulong elemSize, string name) + { + lock (_rangedSymbols) + { + _rangedSymbols.Add(new RangedSymbol(address, address + size, elemSize, name)); + } + } + } +} diff --git a/src/ARMeilleure/Diagnostics/TranslatorEventSource.cs b/src/ARMeilleure/Diagnostics/TranslatorEventSource.cs new file mode 100644 index 00000000..a4f17844 --- /dev/null +++ b/src/ARMeilleure/Diagnostics/TranslatorEventSource.cs @@ -0,0 +1,67 @@ +using System.Diagnostics.Tracing; +using System.Threading; + +namespace ARMeilleure.Diagnostics +{ + [EventSource(Name = "ARMeilleure")] + class TranslatorEventSource : EventSource + { + public static readonly TranslatorEventSource Log = new(); + + private int _rejitQueue; + private ulong _funcTabSize; + private ulong _funcTabLeafSize; + private PollingCounter _rejitQueueCounter; + private PollingCounter _funcTabSizeCounter; + private PollingCounter _funcTabLeafSizeCounter; + + public TranslatorEventSource() + { + _rejitQueueCounter = new PollingCounter("rejit-queue-length", this, () => _rejitQueue) + { + DisplayName = "Rejit Queue Length" + }; + + _funcTabSizeCounter = new PollingCounter("addr-tab-alloc", this, () => _funcTabSize / 1024d / 1024d) + { + DisplayName = "AddressTable Total Bytes Allocated", + DisplayUnits = "MiB" + }; + + _funcTabLeafSizeCounter = new PollingCounter("addr-tab-leaf-alloc", this, () => _funcTabLeafSize / 1024d / 1024d) + { + DisplayName = "AddressTable Total Leaf Bytes Allocated", + DisplayUnits = "MiB" + }; + } + + public void RejitQueueAdd(int count) + { + Interlocked.Add(ref _rejitQueue, count); + } + + public void AddressTableAllocated(int bytes, bool leaf) + { + _funcTabSize += (uint)bytes; + + if (leaf) + { + _funcTabLeafSize += (uint)bytes; + } + } + + protected override void Dispose(bool disposing) + { + _rejitQueueCounter.Dispose(); + _rejitQueueCounter = null; + + _funcTabLeafSizeCounter.Dispose(); + _funcTabLeafSizeCounter = null; + + _funcTabSizeCounter.Dispose(); + _funcTabSizeCounter = null; + + base.Dispose(disposing); + } + } +} diff --git a/src/ARMeilleure/Instructions/CryptoHelper.cs b/src/ARMeilleure/Instructions/CryptoHelper.cs new file mode 100644 index 00000000..e517c75d --- /dev/null +++ b/src/ARMeilleure/Instructions/CryptoHelper.cs @@ -0,0 +1,280 @@ +// https://www.intel.com/content/dam/doc/white-paper/advanced-encryption-standard-new-instructions-set-paper.pdf + +using ARMeilleure.State; +using System; + +namespace ARMeilleure.Instructions +{ + static class CryptoHelper + { +#region "LookUp Tables" + private static ReadOnlySpan<byte> _sBox => new byte[] + { + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 + }; + + private static ReadOnlySpan<byte> _invSBox => new byte[] + { + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d + }; + + private static ReadOnlySpan<byte> _gfMul02 => new byte[] + { + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, + 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, + 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, + 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, + 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, + 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, + 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde, + 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, + 0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05, + 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25, + 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45, + 0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65, + 0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85, + 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5, + 0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5, + 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5 + }; + + private static ReadOnlySpan<byte> _gfMul03 => new byte[] + { + 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11, + 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21, + 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71, + 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41, + 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, + 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1, + 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1, + 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, + 0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a, + 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba, + 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea, + 0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda, + 0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a, + 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a, + 0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a, + 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a + }; + + private static ReadOnlySpan<byte> _gfMul09 => new byte[] + { + 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, + 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, + 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c, + 0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc, + 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01, + 0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91, + 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a, + 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa, + 0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b, + 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b, + 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0, + 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30, + 0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed, + 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d, + 0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6, + 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46 + }; + + private static ReadOnlySpan<byte> _gfMul0B => new byte[] + { + 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69, + 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, + 0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12, + 0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2, + 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f, + 0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f, + 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4, + 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54, + 0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e, + 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e, + 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5, + 0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55, + 0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68, + 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8, + 0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13, + 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3 + }; + + private static ReadOnlySpan<byte> _gfMul0D => new byte[] + { + 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b, + 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b, + 0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0, + 0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20, + 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26, + 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6, + 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d, + 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d, + 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91, + 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41, + 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a, + 0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa, + 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc, + 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, + 0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47, + 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97 + }; + + private static ReadOnlySpan<byte> _gfMul0E => new byte[] + { + 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a, + 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, + 0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81, + 0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61, + 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7, + 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17, + 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c, + 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc, + 0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b, + 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb, + 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0, + 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20, + 0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6, + 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56, + 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d, + 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d + }; + + private static ReadOnlySpan<byte> _srPerm => new byte[] + { + 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3 + }; + + private static ReadOnlySpan<byte> _isrPerm => new byte[] + { + 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11 + }; +#endregion + + public static V128 AesInvMixColumns(V128 op) + { + byte[] inState = op.ToArray(); + byte[] outState = new byte[16]; + + for (int columns = 0; columns <= 3; columns++) + { + int idx = columns << 2; + + byte row0 = inState[idx + 0]; // A, E, I, M: [row0, col0-col3] + byte row1 = inState[idx + 1]; // B, F, J, N: [row1, col0-col3] + byte row2 = inState[idx + 2]; // C, G, K, O: [row2, col0-col3] + byte row3 = inState[idx + 3]; // D, H, L, P: [row3, col0-col3] + + outState[idx + 0] = (byte)((uint)_gfMul0E[row0] ^ _gfMul0B[row1] ^ _gfMul0D[row2] ^ _gfMul09[row3]); + outState[idx + 1] = (byte)((uint)_gfMul09[row0] ^ _gfMul0E[row1] ^ _gfMul0B[row2] ^ _gfMul0D[row3]); + outState[idx + 2] = (byte)((uint)_gfMul0D[row0] ^ _gfMul09[row1] ^ _gfMul0E[row2] ^ _gfMul0B[row3]); + outState[idx + 3] = (byte)((uint)_gfMul0B[row0] ^ _gfMul0D[row1] ^ _gfMul09[row2] ^ _gfMul0E[row3]); + } + + return new V128(outState); + } + + public static V128 AesInvShiftRows(V128 op) + { + byte[] inState = op.ToArray(); + byte[] outState = new byte[16]; + + for (int idx = 0; idx <= 15; idx++) + { + outState[_isrPerm[idx]] = inState[idx]; + } + + return new V128(outState); + } + + public static V128 AesInvSubBytes(V128 op) + { + byte[] inState = op.ToArray(); + byte[] outState = new byte[16]; + + for (int idx = 0; idx <= 15; idx++) + { + outState[idx] = _invSBox[inState[idx]]; + } + + return new V128(outState); + } + + public static V128 AesMixColumns(V128 op) + { + byte[] inState = op.ToArray(); + byte[] outState = new byte[16]; + + for (int columns = 0; columns <= 3; columns++) + { + int idx = columns << 2; + + byte row0 = inState[idx + 0]; // A, E, I, M: [row0, col0-col3] + byte row1 = inState[idx + 1]; // B, F, J, N: [row1, col0-col3] + byte row2 = inState[idx + 2]; // C, G, K, O: [row2, col0-col3] + byte row3 = inState[idx + 3]; // D, H, L, P: [row3, col0-col3] + + outState[idx + 0] = (byte)((uint)_gfMul02[row0] ^ _gfMul03[row1] ^ row2 ^ row3); + outState[idx + 1] = (byte)((uint)row0 ^ _gfMul02[row1] ^ _gfMul03[row2] ^ row3); + outState[idx + 2] = (byte)((uint)row0 ^ row1 ^ _gfMul02[row2] ^ _gfMul03[row3]); + outState[idx + 3] = (byte)((uint)_gfMul03[row0] ^ row1 ^ row2 ^ _gfMul02[row3]); + } + + return new V128(outState); + } + + public static V128 AesShiftRows(V128 op) + { + byte[] inState = op.ToArray(); + byte[] outState = new byte[16]; + + for (int idx = 0; idx <= 15; idx++) + { + outState[_srPerm[idx]] = inState[idx]; + } + + return new V128(outState); + } + + public static V128 AesSubBytes(V128 op) + { + byte[] inState = op.ToArray(); + byte[] outState = new byte[16]; + + for (int idx = 0; idx <= 15; idx++) + { + outState[idx] = _sBox[inState[idx]]; + } + + return new V128(outState); + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitAlu.cs b/src/ARMeilleure/Instructions/InstEmitAlu.cs new file mode 100644 index 00000000..e0d10e77 --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitAlu.cs @@ -0,0 +1,400 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System.Diagnostics; + +using static ARMeilleure.Instructions.InstEmitAluHelper; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Adc(ArmEmitterContext context) => EmitAdc(context, setFlags: false); + public static void Adcs(ArmEmitterContext context) => EmitAdc(context, setFlags: true); + + private static void EmitAdc(ArmEmitterContext context, bool setFlags) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand d = context.Add(n, m); + + Operand carry = GetFlag(PState.CFlag); + + if (context.CurrOp.RegisterSize == RegisterSize.Int64) + { + carry = context.ZeroExtend32(OperandType.I64, carry); + } + + d = context.Add(d, carry); + + if (setFlags) + { + EmitNZFlagsCheck(context, d); + + EmitAdcsCCheck(context, n, d); + EmitAddsVCheck(context, n, m, d); + } + + SetAluDOrZR(context, d); + } + + public static void Add(ArmEmitterContext context) + { + SetAluD(context, context.Add(GetAluN(context), GetAluM(context))); + } + + public static void Adds(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + context.MarkComparison(n, m); + + Operand d = context.Add(n, m); + + EmitNZFlagsCheck(context, d); + + EmitAddsCCheck(context, n, d); + EmitAddsVCheck(context, n, m, d); + + SetAluDOrZR(context, d); + } + + public static void And(ArmEmitterContext context) + { + SetAluD(context, context.BitwiseAnd(GetAluN(context), GetAluM(context))); + } + + public static void Ands(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand d = context.BitwiseAnd(n, m); + + EmitNZFlagsCheck(context, d); + EmitCVFlagsClear(context); + + SetAluDOrZR(context, d); + } + + public static void Asrv(ArmEmitterContext context) + { + SetAluDOrZR(context, context.ShiftRightSI(GetAluN(context), GetAluMShift(context))); + } + + public static void Bic(ArmEmitterContext context) => EmitBic(context, setFlags: false); + public static void Bics(ArmEmitterContext context) => EmitBic(context, setFlags: true); + + private static void EmitBic(ArmEmitterContext context, bool setFlags) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand d = context.BitwiseAnd(n, context.BitwiseNot(m)); + + if (setFlags) + { + EmitNZFlagsCheck(context, d); + EmitCVFlagsClear(context); + } + + SetAluD(context, d, setFlags); + } + + public static void Cls(ArmEmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + + Operand nHigh = context.ShiftRightUI(n, Const(1)); + + bool is32Bits = op.RegisterSize == RegisterSize.Int32; + + Operand mask = is32Bits ? Const(int.MaxValue) : Const(long.MaxValue); + + Operand nLow = context.BitwiseAnd(n, mask); + + Operand res = context.CountLeadingZeros(context.BitwiseExclusiveOr(nHigh, nLow)); + + res = context.Subtract(res, Const(res.Type, 1)); + + SetAluDOrZR(context, res); + } + + public static void Clz(ArmEmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + + Operand d = context.CountLeadingZeros(n); + + SetAluDOrZR(context, d); + } + + public static void Eon(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand d = context.BitwiseExclusiveOr(n, context.BitwiseNot(m)); + + SetAluD(context, d); + } + + public static void Eor(ArmEmitterContext context) + { + SetAluD(context, context.BitwiseExclusiveOr(GetAluN(context), GetAluM(context))); + } + + public static void Extr(ArmEmitterContext context) + { + OpCodeAluRs op = (OpCodeAluRs)context.CurrOp; + + Operand res = GetIntOrZR(context, op.Rm); + + if (op.Shift != 0) + { + if (op.Rn == op.Rm) + { + res = context.RotateRight(res, Const(op.Shift)); + } + else + { + res = context.ShiftRightUI(res, Const(op.Shift)); + + Operand n = GetIntOrZR(context, op.Rn); + + int invShift = op.GetBitsCount() - op.Shift; + + res = context.BitwiseOr(res, context.ShiftLeft(n, Const(invShift))); + } + } + + SetAluDOrZR(context, res); + } + + public static void Lslv(ArmEmitterContext context) + { + SetAluDOrZR(context, context.ShiftLeft(GetAluN(context), GetAluMShift(context))); + } + + public static void Lsrv(ArmEmitterContext context) + { + SetAluDOrZR(context, context.ShiftRightUI(GetAluN(context), GetAluMShift(context))); + } + + public static void Sbc(ArmEmitterContext context) => EmitSbc(context, setFlags: false); + public static void Sbcs(ArmEmitterContext context) => EmitSbc(context, setFlags: true); + + private static void EmitSbc(ArmEmitterContext context, bool setFlags) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand d = context.Subtract(n, m); + + Operand borrow = context.BitwiseExclusiveOr(GetFlag(PState.CFlag), Const(1)); + + if (context.CurrOp.RegisterSize == RegisterSize.Int64) + { + borrow = context.ZeroExtend32(OperandType.I64, borrow); + } + + d = context.Subtract(d, borrow); + + if (setFlags) + { + EmitNZFlagsCheck(context, d); + + EmitSbcsCCheck(context, n, m); + EmitSubsVCheck(context, n, m, d); + } + + SetAluDOrZR(context, d); + } + + public static void Sub(ArmEmitterContext context) + { + SetAluD(context, context.Subtract(GetAluN(context), GetAluM(context))); + } + + public static void Subs(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + context.MarkComparison(n, m); + + Operand d = context.Subtract(n, m); + + EmitNZFlagsCheck(context, d); + + EmitSubsCCheck(context, n, m); + EmitSubsVCheck(context, n, m, d); + + SetAluDOrZR(context, d); + } + + public static void Orn(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand d = context.BitwiseOr(n, context.BitwiseNot(m)); + + SetAluD(context, d); + } + + public static void Orr(ArmEmitterContext context) + { + SetAluD(context, context.BitwiseOr(GetAluN(context), GetAluM(context))); + } + + public static void Rbit(ArmEmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + Operand d; + + if (op.RegisterSize == RegisterSize.Int32) + { + d = EmitReverseBits32Op(context, n); + } + else + { + d = EmitReverseBits64Op(context, n); + } + + SetAluDOrZR(context, d); + } + + private static Operand EmitReverseBits64Op(ArmEmitterContext context, Operand op) + { + Debug.Assert(op.Type == OperandType.I64); + + Operand val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xaaaaaaaaaaaaaaaaul)), Const(1)), + context.ShiftLeft (context.BitwiseAnd(op, Const(0x5555555555555555ul)), Const(1))); + + val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xccccccccccccccccul)), Const(2)), + context.ShiftLeft (context.BitwiseAnd(val, Const(0x3333333333333333ul)), Const(2))); + val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xf0f0f0f0f0f0f0f0ul)), Const(4)), + context.ShiftLeft (context.BitwiseAnd(val, Const(0x0f0f0f0f0f0f0f0ful)), Const(4))); + val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xff00ff00ff00ff00ul)), Const(8)), + context.ShiftLeft (context.BitwiseAnd(val, Const(0x00ff00ff00ff00fful)), Const(8))); + val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xffff0000ffff0000ul)), Const(16)), + context.ShiftLeft (context.BitwiseAnd(val, Const(0x0000ffff0000fffful)), Const(16))); + + return context.BitwiseOr(context.ShiftRightUI(val, Const(32)), context.ShiftLeft(val, Const(32))); + } + + public static void Rev16(ArmEmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + Operand d; + + if (op.RegisterSize == RegisterSize.Int32) + { + d = EmitReverseBytes16_32Op(context, n); + } + else + { + d = EmitReverseBytes16_64Op(context, n); + } + + SetAluDOrZR(context, d); + } + + public static void Rev32(ArmEmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + Operand d; + + if (op.RegisterSize == RegisterSize.Int32) + { + d = context.ByteSwap(n); + } + else + { + d = EmitReverseBytes32_64Op(context, n); + } + + SetAluDOrZR(context, d); + } + + private static Operand EmitReverseBytes32_64Op(ArmEmitterContext context, Operand op) + { + Debug.Assert(op.Type == OperandType.I64); + + Operand val = EmitReverseBytes16_64Op(context, op); + + return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xffff0000ffff0000ul)), Const(16)), + context.ShiftLeft (context.BitwiseAnd(val, Const(0x0000ffff0000fffful)), Const(16))); + } + + public static void Rev64(ArmEmitterContext context) + { + OpCodeAlu op = (OpCodeAlu)context.CurrOp; + + SetAluDOrZR(context, context.ByteSwap(GetIntOrZR(context, op.Rn))); + } + + public static void Rorv(ArmEmitterContext context) + { + SetAluDOrZR(context, context.RotateRight(GetAluN(context), GetAluMShift(context))); + } + + private static Operand GetAluMShift(ArmEmitterContext context) + { + IOpCodeAluRs op = (IOpCodeAluRs)context.CurrOp; + + Operand m = GetIntOrZR(context, op.Rm); + + if (op.RegisterSize == RegisterSize.Int64) + { + m = context.ConvertI64ToI32(m); + } + + return context.BitwiseAnd(m, Const(context.CurrOp.GetBitsCount() - 1)); + } + + private static void EmitCVFlagsClear(ArmEmitterContext context) + { + SetFlag(context, PState.CFlag, Const(0)); + SetFlag(context, PState.VFlag, Const(0)); + } + + public static void SetAluD(ArmEmitterContext context, Operand d) + { + SetAluD(context, d, x31IsZR: false); + } + + public static void SetAluDOrZR(ArmEmitterContext context, Operand d) + { + SetAluD(context, d, x31IsZR: true); + } + + public static void SetAluD(ArmEmitterContext context, Operand d, bool x31IsZR) + { + IOpCodeAlu op = (IOpCodeAlu)context.CurrOp; + + if ((x31IsZR || op is IOpCodeAluRs) && op.Rd == RegisterConsts.ZeroIndex) + { + return; + } + + SetIntOrSP(context, op.Rd, d); + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitAlu32.cs b/src/ARMeilleure/Instructions/InstEmitAlu32.cs new file mode 100644 index 00000000..584ada7e --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitAlu32.cs @@ -0,0 +1,931 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitAluHelper; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Add(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Add(n, m); + + if (ShouldSetFlags(context)) + { + EmitNZFlagsCheck(context, res); + + EmitAddsCCheck(context, n, res); + EmitAddsVCheck(context, n, m, res); + } + + EmitAluStore(context, res); + } + + public static void Adc(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Add(n, m); + + Operand carry = GetFlag(PState.CFlag); + + res = context.Add(res, carry); + + if (ShouldSetFlags(context)) + { + EmitNZFlagsCheck(context, res); + + EmitAdcsCCheck(context, n, res); + EmitAddsVCheck(context, n, m, res); + } + + EmitAluStore(context, res); + } + + public static void And(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseAnd(n, m); + + if (ShouldSetFlags(context)) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Bfc(ArmEmitterContext context) + { + IOpCode32AluBf op = (IOpCode32AluBf)context.CurrOp; + + Operand d = GetIntA32(context, op.Rd); + Operand res = context.BitwiseAnd(d, Const(~op.DestMask)); + + SetIntA32(context, op.Rd, res); + } + + public static void Bfi(ArmEmitterContext context) + { + IOpCode32AluBf op = (IOpCode32AluBf)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand d = GetIntA32(context, op.Rd); + Operand part = context.BitwiseAnd(n, Const(op.SourceMask)); + + if (op.Lsb != 0) + { + part = context.ShiftLeft(part, Const(op.Lsb)); + } + + Operand res = context.BitwiseAnd(d, Const(~op.DestMask)); + res = context.BitwiseOr(res, context.BitwiseAnd(part, Const(op.DestMask))); + + SetIntA32(context, op.Rd, res); + } + + public static void Bic(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseAnd(n, context.BitwiseNot(m)); + + if (ShouldSetFlags(context)) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Clz(ArmEmitterContext context) + { + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.CountLeadingZeros(m); + EmitAluStore(context, res); + } + + public static void Cmp(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Subtract(n, m); + + EmitNZFlagsCheck(context, res); + + EmitSubsCCheck(context, n, res); + EmitSubsVCheck(context, n, m, res); + } + + public static void Cmn(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Add(n, m); + + EmitNZFlagsCheck(context, res); + + EmitAddsCCheck(context, n, res); + EmitAddsVCheck(context, n, m, res); + } + + public static void Eor(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseExclusiveOr(n, m); + + if (ShouldSetFlags(context)) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Mov(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand m = GetAluM(context); + + if (ShouldSetFlags(context)) + { + EmitNZFlagsCheck(context, m); + } + + EmitAluStore(context, m); + } + + public static void Movt(ArmEmitterContext context) + { + IOpCode32AluImm16 op = (IOpCode32AluImm16)context.CurrOp; + + Operand d = GetIntA32(context, op.Rd); + Operand imm = Const(op.Immediate << 16); // Immeditate value as top halfword. + Operand res = context.BitwiseAnd(d, Const(0x0000ffff)); + res = context.BitwiseOr(res, imm); + + EmitAluStore(context, res); + } + + public static void Mul(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.Multiply(n, m); + + if (ShouldSetFlags(context)) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Mvn(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + Operand m = GetAluM(context); + + Operand res = context.BitwiseNot(m); + + if (ShouldSetFlags(context)) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Orr(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseOr(n, m); + + if (ShouldSetFlags(context)) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Orn(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseOr(n, context.BitwiseNot(m)); + + if (ShouldSetFlags(context)) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Pkh(ArmEmitterContext context) + { + OpCode32AluRsImm op = (OpCode32AluRsImm)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res; + + bool tbform = op.ShiftType == ShiftType.Asr; + if (tbform) + { + res = context.BitwiseOr(context.BitwiseAnd(n, Const(0xFFFF0000)), context.BitwiseAnd(m, Const(0xFFFF))); + } + else + { + res = context.BitwiseOr(context.BitwiseAnd(m, Const(0xFFFF0000)), context.BitwiseAnd(n, Const(0xFFFF))); + } + + EmitAluStore(context, res); + } + + public static void Rbit(ArmEmitterContext context) + { + Operand m = GetAluM(context); + + Operand res = EmitReverseBits32Op(context, m); + + EmitAluStore(context, res); + } + + public static void Rev(ArmEmitterContext context) + { + Operand m = GetAluM(context); + + Operand res = context.ByteSwap(m); + + EmitAluStore(context, res); + } + + public static void Rev16(ArmEmitterContext context) + { + Operand m = GetAluM(context); + + Operand res = EmitReverseBytes16_32Op(context, m); + + EmitAluStore(context, res); + } + + public static void Revsh(ArmEmitterContext context) + { + Operand m = GetAluM(context); + + Operand res = EmitReverseBytes16_32Op(context, m); + + EmitAluStore(context, context.SignExtend16(OperandType.I32, res)); + } + + public static void Rsc(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Subtract(m, n); + + Operand borrow = context.BitwiseExclusiveOr(GetFlag(PState.CFlag), Const(1)); + + res = context.Subtract(res, borrow); + + if (ShouldSetFlags(context)) + { + EmitNZFlagsCheck(context, res); + + EmitSbcsCCheck(context, m, n); + EmitSubsVCheck(context, m, n, res); + } + + EmitAluStore(context, res); + } + + public static void Rsb(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Subtract(m, n); + + if (ShouldSetFlags(context)) + { + EmitNZFlagsCheck(context, res); + + EmitSubsCCheck(context, m, res); + EmitSubsVCheck(context, m, n, res); + } + + EmitAluStore(context, res); + } + + public static void Sadd8(ArmEmitterContext context) + { + EmitAddSub8(context, add: true, unsigned: false); + } + + public static void Sbc(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Subtract(n, m); + + Operand borrow = context.BitwiseExclusiveOr(GetFlag(PState.CFlag), Const(1)); + + res = context.Subtract(res, borrow); + + if (ShouldSetFlags(context)) + { + EmitNZFlagsCheck(context, res); + + EmitSbcsCCheck(context, n, m); + EmitSubsVCheck(context, n, m, res); + } + + EmitAluStore(context, res); + } + + public static void Sbfx(ArmEmitterContext context) + { + IOpCode32AluBf op = (IOpCode32AluBf)context.CurrOp; + + var msb = op.Lsb + op.Msb; // For this instruction, the msb is actually a width. + + Operand n = GetIntA32(context, op.Rn); + Operand res = context.ShiftRightSI(context.ShiftLeft(n, Const(31 - msb)), Const(31 - op.Msb)); + + SetIntA32(context, op.Rd, res); + } + + public static void Sdiv(ArmEmitterContext context) + { + EmitDiv(context, unsigned: false); + } + + public static void Sel(ArmEmitterContext context) + { + IOpCode32AluReg op = (IOpCode32AluReg)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand m = GetIntA32(context, op.Rm); + + Operand ge0 = context.ZeroExtend8(OperandType.I32, context.Negate(GetFlag(PState.GE0Flag))); + Operand ge1 = context.ZeroExtend8(OperandType.I32, context.Negate(GetFlag(PState.GE1Flag))); + Operand ge2 = context.ZeroExtend8(OperandType.I32, context.Negate(GetFlag(PState.GE2Flag))); + Operand ge3 = context.Negate(GetFlag(PState.GE3Flag)); + + Operand mask = context.BitwiseOr(ge0, context.ShiftLeft(ge1, Const(8))); + mask = context.BitwiseOr(mask, context.ShiftLeft(ge2, Const(16))); + mask = context.BitwiseOr(mask, context.ShiftLeft(ge3, Const(24))); + + Operand res = context.BitwiseOr(context.BitwiseAnd(n, mask), context.BitwiseAnd(m, context.BitwiseNot(mask))); + + SetIntA32(context, op.Rd, res); + } + + public static void Shadd8(ArmEmitterContext context) + { + EmitHadd8(context, unsigned: false); + } + + public static void Shsub8(ArmEmitterContext context) + { + EmitHsub8(context, unsigned: false); + } + + public static void Ssat(ArmEmitterContext context) + { + OpCode32Sat op = (OpCode32Sat)context.CurrOp; + + EmitSat(context, -(1 << op.SatImm), (1 << op.SatImm) - 1); + } + + public static void Ssat16(ArmEmitterContext context) + { + OpCode32Sat16 op = (OpCode32Sat16)context.CurrOp; + + EmitSat16(context, -(1 << op.SatImm), (1 << op.SatImm) - 1); + } + + public static void Ssub8(ArmEmitterContext context) + { + EmitAddSub8(context, add: false, unsigned: false); + } + + public static void Sub(ArmEmitterContext context) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context, setCarry: false); + + Operand res = context.Subtract(n, m); + + if (ShouldSetFlags(context)) + { + EmitNZFlagsCheck(context, res); + + EmitSubsCCheck(context, n, res); + EmitSubsVCheck(context, n, m, res); + } + + EmitAluStore(context, res); + } + + public static void Sxtb(ArmEmitterContext context) + { + EmitSignExtend(context, true, 8); + } + + public static void Sxtb16(ArmEmitterContext context) + { + EmitExtend16(context, true); + } + + public static void Sxth(ArmEmitterContext context) + { + EmitSignExtend(context, true, 16); + } + + public static void Teq(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseExclusiveOr(n, m); + + EmitNZFlagsCheck(context, res); + } + + public static void Tst(ArmEmitterContext context) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + Operand res = context.BitwiseAnd(n, m); + EmitNZFlagsCheck(context, res); + } + + public static void Uadd8(ArmEmitterContext context) + { + EmitAddSub8(context, add: true, unsigned: true); + } + + public static void Ubfx(ArmEmitterContext context) + { + IOpCode32AluBf op = (IOpCode32AluBf)context.CurrOp; + + var msb = op.Lsb + op.Msb; // For this instruction, the msb is actually a width. + + Operand n = GetIntA32(context, op.Rn); + Operand res = context.ShiftRightUI(context.ShiftLeft(n, Const(31 - msb)), Const(31 - op.Msb)); + + SetIntA32(context, op.Rd, res); + } + + public static void Udiv(ArmEmitterContext context) + { + EmitDiv(context, unsigned: true); + } + + public static void Uhadd8(ArmEmitterContext context) + { + EmitHadd8(context, unsigned: true); + } + + public static void Uhsub8(ArmEmitterContext context) + { + EmitHsub8(context, unsigned: true); + } + + public static void Usat(ArmEmitterContext context) + { + OpCode32Sat op = (OpCode32Sat)context.CurrOp; + + EmitSat(context, 0, op.SatImm == 32 ? (int)(~0) : (1 << op.SatImm) - 1); + } + + public static void Usat16(ArmEmitterContext context) + { + OpCode32Sat16 op = (OpCode32Sat16)context.CurrOp; + + EmitSat16(context, 0, (1 << op.SatImm) - 1); + } + + public static void Usub8(ArmEmitterContext context) + { + EmitAddSub8(context, add: false, unsigned: true); + } + + public static void Uxtb(ArmEmitterContext context) + { + EmitSignExtend(context, false, 8); + } + + public static void Uxtb16(ArmEmitterContext context) + { + EmitExtend16(context, false); + } + + public static void Uxth(ArmEmitterContext context) + { + EmitSignExtend(context, false, 16); + } + + private static void EmitSignExtend(ArmEmitterContext context, bool signed, int bits) + { + IOpCode32AluUx op = (IOpCode32AluUx)context.CurrOp; + + Operand m = GetAluM(context); + Operand res; + + if (op.RotateBits == 0) + { + res = m; + } + else + { + Operand rotate = Const(op.RotateBits); + res = context.RotateRight(m, rotate); + } + + switch (bits) + { + case 8: + res = (signed) ? context.SignExtend8(OperandType.I32, res) : context.ZeroExtend8(OperandType.I32, res); + break; + case 16: + res = (signed) ? context.SignExtend16(OperandType.I32, res) : context.ZeroExtend16(OperandType.I32, res); + break; + } + + if (op.Add) + { + res = context.Add(res, GetAluN(context)); + } + + EmitAluStore(context, res); + } + + private static void EmitExtend16(ArmEmitterContext context, bool signed) + { + IOpCode32AluUx op = (IOpCode32AluUx)context.CurrOp; + + Operand m = GetAluM(context); + Operand res; + + if (op.RotateBits == 0) + { + res = m; + } + else + { + Operand rotate = Const(op.RotateBits); + res = context.RotateRight(m, rotate); + } + + Operand low16, high16; + if (signed) + { + low16 = context.SignExtend8(OperandType.I32, res); + high16 = context.SignExtend8(OperandType.I32, context.ShiftRightUI(res, Const(16))); + } + else + { + low16 = context.ZeroExtend8(OperandType.I32, res); + high16 = context.ZeroExtend8(OperandType.I32, context.ShiftRightUI(res, Const(16))); + } + + if (op.Add) + { + Operand n = GetAluN(context); + Operand lowAdd, highAdd; + if (signed) + { + lowAdd = context.SignExtend16(OperandType.I32, n); + highAdd = context.SignExtend16(OperandType.I32, context.ShiftRightUI(n, Const(16))); + } + else + { + lowAdd = context.ZeroExtend16(OperandType.I32, n); + highAdd = context.ZeroExtend16(OperandType.I32, context.ShiftRightUI(n, Const(16))); + } + + low16 = context.Add(low16, lowAdd); + high16 = context.Add(high16, highAdd); + } + + res = context.BitwiseOr( + context.ZeroExtend16(OperandType.I32, low16), + context.ShiftLeft(context.ZeroExtend16(OperandType.I32, high16), Const(16))); + + EmitAluStore(context, res); + } + + private static void EmitDiv(ArmEmitterContext context, bool unsigned) + { + Operand n = GetAluN(context); + Operand m = GetAluM(context); + Operand zero = Const(m.Type, 0); + + Operand divisorIsZero = context.ICompareEqual(m, zero); + + Operand lblBadDiv = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBadDiv, divisorIsZero); + + if (!unsigned) + { + // ARM64 behaviour: If Rn == INT_MIN && Rm == -1, Rd = INT_MIN (overflow). + // TODO: tests to ensure A32 works the same + + Operand intMin = Const(int.MinValue); + Operand minus1 = Const(-1); + + Operand nIsIntMin = context.ICompareEqual(n, intMin); + Operand mIsMinus1 = context.ICompareEqual(m, minus1); + + Operand lblGoodDiv = Label(); + + context.BranchIfFalse(lblGoodDiv, context.BitwiseAnd(nIsIntMin, mIsMinus1)); + + EmitAluStore(context, intMin); + + context.Branch(lblEnd); + + context.MarkLabel(lblGoodDiv); + } + + Operand res = unsigned + ? context.DivideUI(n, m) + : context.Divide(n, m); + + EmitAluStore(context, res); + + context.Branch(lblEnd); + + context.MarkLabel(lblBadDiv); + + EmitAluStore(context, zero); + + context.MarkLabel(lblEnd); + } + + private static void EmitAddSub8(ArmEmitterContext context, bool add, bool unsigned) + { + IOpCode32AluReg op = (IOpCode32AluReg)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand m = GetIntA32(context, op.Rm); + + Operand res = Const(0); + + for (int byteSel = 0; byteSel < 4; byteSel++) + { + Operand shift = Const(byteSel * 8); + + Operand nByte = context.ShiftRightUI(n, shift); + Operand mByte = context.ShiftRightUI(m, shift); + + nByte = unsigned ? context.ZeroExtend8(OperandType.I32, nByte) : context.SignExtend8(OperandType.I32, nByte); + mByte = unsigned ? context.ZeroExtend8(OperandType.I32, mByte) : context.SignExtend8(OperandType.I32, mByte); + + Operand resByte = add ? context.Add(nByte, mByte) : context.Subtract(nByte, mByte); + + res = context.BitwiseOr(res, context.ShiftLeft(context.ZeroExtend8(OperandType.I32, resByte), shift)); + + SetFlag(context, PState.GE0Flag + byteSel, unsigned && add + ? context.ShiftRightUI(resByte, Const(8)) + : context.ShiftRightUI(context.BitwiseNot(resByte), Const(31))); + } + + SetIntA32(context, op.Rd, res); + } + + private static void EmitHadd8(ArmEmitterContext context, bool unsigned) + { + IOpCode32AluReg op = (IOpCode32AluReg)context.CurrOp; + + Operand m = GetIntA32(context, op.Rm); + Operand n = GetIntA32(context, op.Rn); + + Operand xor, res, carry; + + // This relies on the equality x+y == ((x&y) << 1) + (x^y). + // Note that x^y always contains the LSB of the result. + // Since we want to calculate (x+y)/2, we can instead calculate (x&y) + ((x^y)>>1). + // We mask by 0x7F to remove the LSB so that it doesn't leak into the field below. + + res = context.BitwiseAnd(m, n); + carry = context.BitwiseExclusiveOr(m, n); + xor = context.ShiftRightUI(carry, Const(1)); + xor = context.BitwiseAnd(xor, Const(0x7F7F7F7Fu)); + res = context.Add(res, xor); + + if (!unsigned) + { + // Propagates the sign bit from (x^y)>>1 upwards by one. + carry = context.BitwiseAnd(carry, Const(0x80808080u)); + res = context.BitwiseExclusiveOr(res, carry); + } + + SetIntA32(context, op.Rd, res); + } + + private static void EmitHsub8(ArmEmitterContext context, bool unsigned) + { + IOpCode32AluReg op = (IOpCode32AluReg)context.CurrOp; + + Operand m = GetIntA32(context, op.Rm); + Operand n = GetIntA32(context, op.Rn); + Operand left, right, carry, res; + + // This relies on the equality x-y == (x^y) - (((x^y)&y) << 1). + // Note that x^y always contains the LSB of the result. + // Since we want to calculate (x+y)/2, we can instead calculate ((x^y)>>1) - ((x^y)&y). + + carry = context.BitwiseExclusiveOr(m, n); + left = context.ShiftRightUI(carry, Const(1)); + right = context.BitwiseAnd(carry, m); + + // We must now perform a partitioned subtraction. + // We can do this because minuend contains 7 bit fields. + // We use the extra bit in minuend as a bit to borrow from; we set this bit. + // We invert this bit at the end as this tells us if that bit was borrowed from. + + res = context.BitwiseOr(left, Const(0x80808080)); + res = context.Subtract(res, right); + res = context.BitwiseExclusiveOr(res, Const(0x80808080)); + + if (!unsigned) + { + // We then sign extend the result into this bit. + carry = context.BitwiseAnd(carry, Const(0x80808080)); + res = context.BitwiseExclusiveOr(res, carry); + } + + SetIntA32(context, op.Rd, res); + } + + private static void EmitSat(ArmEmitterContext context, int intMin, int intMax) + { + OpCode32Sat op = (OpCode32Sat)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + + int shift = DecodeImmShift(op.ShiftType, op.Imm5); + + switch (op.ShiftType) + { + case ShiftType.Lsl: + if (shift == 32) + { + n = Const(0); + } + else + { + n = context.ShiftLeft(n, Const(shift)); + } + break; + case ShiftType.Asr: + if (shift == 32) + { + n = context.ShiftRightSI(n, Const(31)); + } + else + { + n = context.ShiftRightSI(n, Const(shift)); + } + break; + } + + Operand lblCheckLtIntMin = Label(); + Operand lblNoSat = Label(); + Operand lblEnd = Label(); + + context.BranchIfFalse(lblCheckLtIntMin, context.ICompareGreater(n, Const(intMax))); + + SetFlag(context, PState.QFlag, Const(1)); + SetIntA32(context, op.Rd, Const(intMax)); + context.Branch(lblEnd); + + context.MarkLabel(lblCheckLtIntMin); + context.BranchIfFalse(lblNoSat, context.ICompareLess(n, Const(intMin))); + + SetFlag(context, PState.QFlag, Const(1)); + SetIntA32(context, op.Rd, Const(intMin)); + context.Branch(lblEnd); + + context.MarkLabel(lblNoSat); + + SetIntA32(context, op.Rd, n); + + context.MarkLabel(lblEnd); + } + + private static void EmitSat16(ArmEmitterContext context, int intMin, int intMax) + { + OpCode32Sat16 op = (OpCode32Sat16)context.CurrOp; + + void SetD(int part, Operand value) + { + if (part == 0) + { + SetIntA32(context, op.Rd, context.ZeroExtend16(OperandType.I32, value)); + } + else + { + SetIntA32(context, op.Rd, context.BitwiseOr(GetIntA32(context, op.Rd), context.ShiftLeft(value, Const(16)))); + } + } + + Operand n = GetIntA32(context, op.Rn); + + Operand nLow = context.SignExtend16(OperandType.I32, n); + Operand nHigh = context.ShiftRightSI(n, Const(16)); + + for (int part = 0; part < 2; part++) + { + Operand nPart = part == 0 ? nLow : nHigh; + + Operand lblCheckLtIntMin = Label(); + Operand lblNoSat = Label(); + Operand lblEnd = Label(); + + context.BranchIfFalse(lblCheckLtIntMin, context.ICompareGreater(nPart, Const(intMax))); + + SetFlag(context, PState.QFlag, Const(1)); + SetD(part, Const(intMax)); + context.Branch(lblEnd); + + context.MarkLabel(lblCheckLtIntMin); + context.BranchIfFalse(lblNoSat, context.ICompareLess(nPart, Const(intMin))); + + SetFlag(context, PState.QFlag, Const(1)); + SetD(part, Const(intMin)); + context.Branch(lblEnd); + + context.MarkLabel(lblNoSat); + + SetD(part, nPart); + + context.MarkLabel(lblEnd); + } + } + + private static void EmitAluStore(ArmEmitterContext context, Operand value) + { + IOpCode32Alu op = (IOpCode32Alu)context.CurrOp; + + EmitGenericAluStoreA32(context, op.Rd, ShouldSetFlags(context), value); + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitAluHelper.cs b/src/ARMeilleure/Instructions/InstEmitAluHelper.cs new file mode 100644 index 00000000..994878ad --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitAluHelper.cs @@ -0,0 +1,613 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static class InstEmitAluHelper + { + public static bool ShouldSetFlags(ArmEmitterContext context) + { + IOpCode32HasSetFlags op = (IOpCode32HasSetFlags)context.CurrOp; + + if (op.SetFlags == null) + { + return !context.IsInIfThenBlock; + } + + return op.SetFlags.Value; + } + + public static void EmitNZFlagsCheck(ArmEmitterContext context, Operand d) + { + SetFlag(context, PState.NFlag, context.ICompareLess (d, Const(d.Type, 0))); + SetFlag(context, PState.ZFlag, context.ICompareEqual(d, Const(d.Type, 0))); + } + + public static void EmitAdcsCCheck(ArmEmitterContext context, Operand n, Operand d) + { + // C = (Rd == Rn && CIn) || Rd < Rn + Operand cIn = GetFlag(PState.CFlag); + + Operand cOut = context.BitwiseAnd(context.ICompareEqual(d, n), cIn); + + cOut = context.BitwiseOr(cOut, context.ICompareLessUI(d, n)); + + SetFlag(context, PState.CFlag, cOut); + } + + public static void EmitAddsCCheck(ArmEmitterContext context, Operand n, Operand d) + { + // C = Rd < Rn + SetFlag(context, PState.CFlag, context.ICompareLessUI(d, n)); + } + + public static void EmitAddsVCheck(ArmEmitterContext context, Operand n, Operand m, Operand d) + { + // V = (Rd ^ Rn) & ~(Rn ^ Rm) < 0 + Operand vOut = context.BitwiseExclusiveOr(d, n); + + vOut = context.BitwiseAnd(vOut, context.BitwiseNot(context.BitwiseExclusiveOr(n, m))); + + vOut = context.ICompareLess(vOut, Const(vOut.Type, 0)); + + SetFlag(context, PState.VFlag, vOut); + } + + public static void EmitSbcsCCheck(ArmEmitterContext context, Operand n, Operand m) + { + // C = (Rn == Rm && CIn) || Rn > Rm + Operand cIn = GetFlag(PState.CFlag); + + Operand cOut = context.BitwiseAnd(context.ICompareEqual(n, m), cIn); + + cOut = context.BitwiseOr(cOut, context.ICompareGreaterUI(n, m)); + + SetFlag(context, PState.CFlag, cOut); + } + + public static void EmitSubsCCheck(ArmEmitterContext context, Operand n, Operand m) + { + // C = Rn >= Rm + SetFlag(context, PState.CFlag, context.ICompareGreaterOrEqualUI(n, m)); + } + + public static void EmitSubsVCheck(ArmEmitterContext context, Operand n, Operand m, Operand d) + { + // V = (Rd ^ Rn) & (Rn ^ Rm) < 0 + Operand vOut = context.BitwiseExclusiveOr(d, n); + + vOut = context.BitwiseAnd(vOut, context.BitwiseExclusiveOr(n, m)); + + vOut = context.ICompareLess(vOut, Const(vOut.Type, 0)); + + SetFlag(context, PState.VFlag, vOut); + } + + public static Operand EmitReverseBits32Op(ArmEmitterContext context, Operand op) + { + Debug.Assert(op.Type == OperandType.I32); + + Operand val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xaaaaaaaau)), Const(1)), + context.ShiftLeft(context.BitwiseAnd(op, Const(0x55555555u)), Const(1))); + + val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xccccccccu)), Const(2)), + context.ShiftLeft(context.BitwiseAnd(val, Const(0x33333333u)), Const(2))); + val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xf0f0f0f0u)), Const(4)), + context.ShiftLeft(context.BitwiseAnd(val, Const(0x0f0f0f0fu)), Const(4))); + val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xff00ff00u)), Const(8)), + context.ShiftLeft(context.BitwiseAnd(val, Const(0x00ff00ffu)), Const(8))); + + return context.BitwiseOr(context.ShiftRightUI(val, Const(16)), context.ShiftLeft(val, Const(16))); + } + + public static Operand EmitReverseBytes16_64Op(ArmEmitterContext context, Operand op) + { + Debug.Assert(op.Type == OperandType.I64); + + return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xff00ff00ff00ff00ul)), Const(8)), + context.ShiftLeft(context.BitwiseAnd(op, Const(0x00ff00ff00ff00fful)), Const(8))); + } + + public static Operand EmitReverseBytes16_32Op(ArmEmitterContext context, Operand op) + { + Debug.Assert(op.Type == OperandType.I32); + + Operand val = EmitReverseBytes16_64Op(context, context.ZeroExtend32(OperandType.I64, op)); + + return context.ConvertI64ToI32(val); + } + + private static void EmitAluWritePc(ArmEmitterContext context, Operand value) + { + Debug.Assert(value.Type == OperandType.I32); + + if (((OpCode32)context.CurrOp).IsThumb) + { + bool isReturn = IsA32Return(context); + if (!isReturn) + { + context.StoreToContext(); + } + + InstEmitFlowHelper.EmitVirtualJump(context, value, isReturn); + } + else + { + EmitBxWritePc(context, value); + } + } + + public static void EmitGenericAluStoreA32(ArmEmitterContext context, int rd, bool setFlags, Operand value) + { + Debug.Assert(value.Type == OperandType.I32); + + if (rd == RegisterAlias.Aarch32Pc && setFlags) + { + if (setFlags) + { + // TODO: Load SPSR etc. + + EmitBxWritePc(context, value); + } + else + { + EmitAluWritePc(context, value); + } + } + else + { + SetIntA32(context, rd, value); + } + } + + public static Operand GetAluN(ArmEmitterContext context) + { + if (context.CurrOp is IOpCodeAlu op) + { + if (op.DataOp == DataOp.Logical || op is IOpCodeAluRs) + { + return GetIntOrZR(context, op.Rn); + } + else + { + return GetIntOrSP(context, op.Rn); + } + } + else if (context.CurrOp is IOpCode32Alu op32) + { + return GetIntA32(context, op32.Rn); + } + else + { + throw InvalidOpCodeType(context.CurrOp); + } + } + + public static Operand GetAluM(ArmEmitterContext context, bool setCarry = true) + { + switch (context.CurrOp) + { + // ARM32. + case IOpCode32AluImm op: + { + if (ShouldSetFlags(context) && op.IsRotated && setCarry) + { + SetFlag(context, PState.CFlag, Const((uint)op.Immediate >> 31)); + } + + return Const(op.Immediate); + } + + case IOpCode32AluImm16 op: return Const(op.Immediate); + + case IOpCode32AluRsImm op: return GetMShiftedByImmediate(context, op, setCarry); + case IOpCode32AluRsReg op: return GetMShiftedByReg(context, op, setCarry); + + case IOpCode32AluReg op: return GetIntA32(context, op.Rm); + + // ARM64. + case IOpCodeAluImm op: + { + if (op.GetOperandType() == OperandType.I32) + { + return Const((int)op.Immediate); + } + else + { + return Const(op.Immediate); + } + } + + case IOpCodeAluRs op: + { + Operand value = GetIntOrZR(context, op.Rm); + + switch (op.ShiftType) + { + case ShiftType.Lsl: value = context.ShiftLeft (value, Const(op.Shift)); break; + case ShiftType.Lsr: value = context.ShiftRightUI(value, Const(op.Shift)); break; + case ShiftType.Asr: value = context.ShiftRightSI(value, Const(op.Shift)); break; + case ShiftType.Ror: value = context.RotateRight (value, Const(op.Shift)); break; + } + + return value; + } + + case IOpCodeAluRx op: + { + Operand value = GetExtendedM(context, op.Rm, op.IntType); + + value = context.ShiftLeft(value, Const(op.Shift)); + + return value; + } + + default: throw InvalidOpCodeType(context.CurrOp); + } + } + + private static Exception InvalidOpCodeType(OpCode opCode) + { + return new InvalidOperationException($"Invalid OpCode type \"{opCode?.GetType().Name ?? "null"}\"."); + } + + // ARM32 helpers. + public static Operand GetMShiftedByImmediate(ArmEmitterContext context, IOpCode32AluRsImm op, bool setCarry) + { + Operand m = GetIntA32(context, op.Rm); + + int shift = op.Immediate; + + if (shift == 0) + { + switch (op.ShiftType) + { + case ShiftType.Lsr: shift = 32; break; + case ShiftType.Asr: shift = 32; break; + case ShiftType.Ror: shift = 1; break; + } + } + + if (shift != 0) + { + setCarry &= ShouldSetFlags(context); + + switch (op.ShiftType) + { + case ShiftType.Lsl: m = GetLslC(context, m, setCarry, shift); break; + case ShiftType.Lsr: m = GetLsrC(context, m, setCarry, shift); break; + case ShiftType.Asr: m = GetAsrC(context, m, setCarry, shift); break; + case ShiftType.Ror: + if (op.Immediate != 0) + { + m = GetRorC(context, m, setCarry, shift); + } + else + { + m = GetRrxC(context, m, setCarry); + } + break; + } + } + + return m; + } + + public static int DecodeImmShift(ShiftType shiftType, int shift) + { + if (shift == 0) + { + switch (shiftType) + { + case ShiftType.Lsr: shift = 32; break; + case ShiftType.Asr: shift = 32; break; + case ShiftType.Ror: shift = 1; break; + } + } + + return shift; + } + + public static Operand GetMShiftedByReg(ArmEmitterContext context, IOpCode32AluRsReg op, bool setCarry) + { + Operand m = GetIntA32(context, op.Rm); + Operand s = context.ZeroExtend8(OperandType.I32, GetIntA32(context, op.Rs)); + Operand shiftIsZero = context.ICompareEqual(s, Const(0)); + + Operand zeroResult = m; + Operand shiftResult = m; + + setCarry &= ShouldSetFlags(context); + + switch (op.ShiftType) + { + case ShiftType.Lsl: shiftResult = EmitLslC(context, m, setCarry, s, shiftIsZero); break; + case ShiftType.Lsr: shiftResult = EmitLsrC(context, m, setCarry, s, shiftIsZero); break; + case ShiftType.Asr: shiftResult = EmitAsrC(context, m, setCarry, s, shiftIsZero); break; + case ShiftType.Ror: shiftResult = EmitRorC(context, m, setCarry, s, shiftIsZero); break; + } + + return context.ConditionalSelect(shiftIsZero, zeroResult, shiftResult); + } + + public static void EmitIfHelper(ArmEmitterContext context, Operand boolValue, Action action, bool expected = true) + { + Debug.Assert(boolValue.Type == OperandType.I32); + + Operand endLabel = Label(); + + if (expected) + { + context.BranchIfFalse(endLabel, boolValue); + } + else + { + context.BranchIfTrue(endLabel, boolValue); + } + + action(); + + context.MarkLabel(endLabel); + } + + public static Operand EmitLslC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero) + { + Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32); + + Operand shiftLarge = context.ICompareGreaterOrEqual(shift, Const(32)); + Operand result = context.ShiftLeft(m, shift); + if (setCarry) + { + EmitIfHelper(context, shiftIsZero, () => + { + Operand cOut = context.ShiftRightUI(m, context.Subtract(Const(32), shift)); + + cOut = context.BitwiseAnd(cOut, Const(1)); + cOut = context.ConditionalSelect(context.ICompareGreater(shift, Const(32)), Const(0), cOut); + + SetFlag(context, PState.CFlag, cOut); + }, false); + } + + return context.ConditionalSelect(shiftLarge, Const(0), result); + } + + public static Operand GetLslC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + { + Debug.Assert(m.Type == OperandType.I32); + + if ((uint)shift > 32) + { + return GetShiftByMoreThan32(context, setCarry); + } + else if (shift == 32) + { + if (setCarry) + { + SetCarryMLsb(context, m); + } + + return Const(0); + } + else + { + if (setCarry) + { + Operand cOut = context.ShiftRightUI(m, Const(32 - shift)); + + cOut = context.BitwiseAnd(cOut, Const(1)); + + SetFlag(context, PState.CFlag, cOut); + } + + return context.ShiftLeft(m, Const(shift)); + } + } + + public static Operand EmitLsrC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero) + { + Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32); + + Operand shiftLarge = context.ICompareGreaterOrEqual(shift, Const(32)); + Operand result = context.ShiftRightUI(m, shift); + if (setCarry) + { + EmitIfHelper(context, shiftIsZero, () => + { + Operand cOut = context.ShiftRightUI(m, context.Subtract(shift, Const(1))); + + cOut = context.BitwiseAnd(cOut, Const(1)); + cOut = context.ConditionalSelect(context.ICompareGreater(shift, Const(32)), Const(0), cOut); + + SetFlag(context, PState.CFlag, cOut); + }, false); + } + + return context.ConditionalSelect(shiftLarge, Const(0), result); + } + + public static Operand GetLsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + { + Debug.Assert(m.Type == OperandType.I32); + + if ((uint)shift > 32) + { + return GetShiftByMoreThan32(context, setCarry); + } + else if (shift == 32) + { + if (setCarry) + { + SetCarryMMsb(context, m); + } + + return Const(0); + } + else + { + if (setCarry) + { + SetCarryMShrOut(context, m, shift); + } + + return context.ShiftRightUI(m, Const(shift)); + } + } + + private static Operand GetShiftByMoreThan32(ArmEmitterContext context, bool setCarry) + { + if (setCarry) + { + SetFlag(context, PState.CFlag, Const(0)); + } + + return Const(0); + } + + public static Operand EmitAsrC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero) + { + Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32); + + Operand l32Result; + Operand ge32Result; + + Operand less32 = context.ICompareLess(shift, Const(32)); + + ge32Result = context.ShiftRightSI(m, Const(31)); + + if (setCarry) + { + EmitIfHelper(context, context.BitwiseOr(less32, shiftIsZero), () => + { + SetCarryMLsb(context, ge32Result); + }, false); + } + + l32Result = context.ShiftRightSI(m, shift); + if (setCarry) + { + EmitIfHelper(context, context.BitwiseAnd(less32, context.BitwiseNot(shiftIsZero)), () => + { + Operand cOut = context.ShiftRightUI(m, context.Subtract(shift, Const(1))); + + cOut = context.BitwiseAnd(cOut, Const(1)); + + SetFlag(context, PState.CFlag, cOut); + }); + } + + return context.ConditionalSelect(less32, l32Result, ge32Result); + } + + public static Operand GetAsrC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + { + Debug.Assert(m.Type == OperandType.I32); + + if ((uint)shift >= 32) + { + m = context.ShiftRightSI(m, Const(31)); + + if (setCarry) + { + SetCarryMLsb(context, m); + } + + return m; + } + else + { + if (setCarry) + { + SetCarryMShrOut(context, m, shift); + } + + return context.ShiftRightSI(m, Const(shift)); + } + } + + public static Operand EmitRorC(ArmEmitterContext context, Operand m, bool setCarry, Operand shift, Operand shiftIsZero) + { + Debug.Assert(m.Type == OperandType.I32 && shift.Type == OperandType.I32 && shiftIsZero.Type == OperandType.I32); + + shift = context.BitwiseAnd(shift, Const(0x1f)); + m = context.RotateRight(m, shift); + + if (setCarry) + { + EmitIfHelper(context, shiftIsZero, () => + { + SetCarryMMsb(context, m); + }, false); + } + + return m; + } + + public static Operand GetRorC(ArmEmitterContext context, Operand m, bool setCarry, int shift) + { + Debug.Assert(m.Type == OperandType.I32); + + shift &= 0x1f; + + m = context.RotateRight(m, Const(shift)); + + if (setCarry) + { + SetCarryMMsb(context, m); + } + + return m; + } + + public static Operand GetRrxC(ArmEmitterContext context, Operand m, bool setCarry) + { + Debug.Assert(m.Type == OperandType.I32); + + // Rotate right by 1 with carry. + Operand cIn = context.Copy(GetFlag(PState.CFlag)); + + if (setCarry) + { + SetCarryMLsb(context, m); + } + + m = context.ShiftRightUI(m, Const(1)); + + m = context.BitwiseOr(m, context.ShiftLeft(cIn, Const(31))); + + return m; + } + + private static void SetCarryMLsb(ArmEmitterContext context, Operand m) + { + Debug.Assert(m.Type == OperandType.I32); + + SetFlag(context, PState.CFlag, context.BitwiseAnd(m, Const(1))); + } + + private static void SetCarryMMsb(ArmEmitterContext context, Operand m) + { + Debug.Assert(m.Type == OperandType.I32); + + SetFlag(context, PState.CFlag, context.ShiftRightUI(m, Const(31))); + } + + private static void SetCarryMShrOut(ArmEmitterContext context, Operand m, int shift) + { + Debug.Assert(m.Type == OperandType.I32); + + Operand cOut = context.ShiftRightUI(m, Const(shift - 1)); + + cOut = context.BitwiseAnd(cOut, Const(1)); + + SetFlag(context, PState.CFlag, cOut); + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitBfm.cs b/src/ARMeilleure/Instructions/InstEmitBfm.cs new file mode 100644 index 00000000..46a7dddd --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitBfm.cs @@ -0,0 +1,196 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Bfm(ArmEmitterContext context) + { + OpCodeBfm op = (OpCodeBfm)context.CurrOp; + + Operand d = GetIntOrZR(context, op.Rd); + Operand n = GetIntOrZR(context, op.Rn); + + Operand res; + + if (op.Pos < op.Shift) + { + // BFI. + int shift = op.GetBitsCount() - op.Shift; + + int width = op.Pos + 1; + + long mask = (long)(ulong.MaxValue >> (64 - width)); + + res = context.ShiftLeft(context.BitwiseAnd(n, Const(n.Type, mask)), Const(shift)); + + res = context.BitwiseOr(res, context.BitwiseAnd(d, Const(d.Type, ~(mask << shift)))); + } + else + { + // BFXIL. + int shift = op.Shift; + + int width = op.Pos - shift + 1; + + long mask = (long)(ulong.MaxValue >> (64 - width)); + + res = context.BitwiseAnd(context.ShiftRightUI(n, Const(shift)), Const(n.Type, mask)); + + res = context.BitwiseOr(res, context.BitwiseAnd(d, Const(d.Type, ~mask))); + } + + SetIntOrZR(context, op.Rd, res); + } + + public static void Sbfm(ArmEmitterContext context) + { + OpCodeBfm op = (OpCodeBfm)context.CurrOp; + + int bitsCount = op.GetBitsCount(); + + if (op.Pos + 1 == bitsCount) + { + EmitSbfmShift(context); + } + else if (op.Pos < op.Shift) + { + EmitSbfiz(context); + } + else if (op.Pos == 7 && op.Shift == 0) + { + Operand n = GetIntOrZR(context, op.Rn); + + SetIntOrZR(context, op.Rd, context.SignExtend8(n.Type, n)); + } + else if (op.Pos == 15 && op.Shift == 0) + { + Operand n = GetIntOrZR(context, op.Rn); + + SetIntOrZR(context, op.Rd, context.SignExtend16(n.Type, n)); + } + else if (op.Pos == 31 && op.Shift == 0) + { + Operand n = GetIntOrZR(context, op.Rn); + + SetIntOrZR(context, op.Rd, context.SignExtend32(n.Type, n)); + } + else + { + Operand res = GetIntOrZR(context, op.Rn); + + res = context.ShiftLeft (res, Const(bitsCount - 1 - op.Pos)); + res = context.ShiftRightSI(res, Const(bitsCount - 1)); + res = context.BitwiseAnd (res, Const(res.Type, ~op.TMask)); + + Operand n2 = GetBfmN(context); + + SetIntOrZR(context, op.Rd, context.BitwiseOr(res, n2)); + } + } + + public static void Ubfm(ArmEmitterContext context) + { + OpCodeBfm op = (OpCodeBfm)context.CurrOp; + + if (op.Pos + 1 == op.GetBitsCount()) + { + EmitUbfmShift(context); + } + else if (op.Pos < op.Shift) + { + EmitUbfiz(context); + } + else if (op.Pos + 1 == op.Shift) + { + EmitBfmLsl(context); + } + else if (op.Pos == 7 && op.Shift == 0) + { + Operand n = GetIntOrZR(context, op.Rn); + + SetIntOrZR(context, op.Rd, context.BitwiseAnd(n, Const(n.Type, 0xff))); + } + else if (op.Pos == 15 && op.Shift == 0) + { + Operand n = GetIntOrZR(context, op.Rn); + + SetIntOrZR(context, op.Rd, context.BitwiseAnd(n, Const(n.Type, 0xffff))); + } + else + { + SetIntOrZR(context, op.Rd, GetBfmN(context)); + } + } + + private static void EmitSbfiz(ArmEmitterContext context) => EmitBfiz(context, signed: true); + private static void EmitUbfiz(ArmEmitterContext context) => EmitBfiz(context, signed: false); + + private static void EmitBfiz(ArmEmitterContext context, bool signed) + { + OpCodeBfm op = (OpCodeBfm)context.CurrOp; + + int width = op.Pos + 1; + + Operand res = GetIntOrZR(context, op.Rn); + + res = context.ShiftLeft(res, Const(op.GetBitsCount() - width)); + + res = signed + ? context.ShiftRightSI(res, Const(op.Shift - width)) + : context.ShiftRightUI(res, Const(op.Shift - width)); + + SetIntOrZR(context, op.Rd, res); + } + + private static void EmitSbfmShift(ArmEmitterContext context) + { + EmitBfmShift(context, signed: true); + } + + private static void EmitUbfmShift(ArmEmitterContext context) + { + EmitBfmShift(context, signed: false); + } + + private static void EmitBfmShift(ArmEmitterContext context, bool signed) + { + OpCodeBfm op = (OpCodeBfm)context.CurrOp; + + Operand res = GetIntOrZR(context, op.Rn); + + res = signed + ? context.ShiftRightSI(res, Const(op.Shift)) + : context.ShiftRightUI(res, Const(op.Shift)); + + SetIntOrZR(context, op.Rd, res); + } + + private static void EmitBfmLsl(ArmEmitterContext context) + { + OpCodeBfm op = (OpCodeBfm)context.CurrOp; + + Operand res = GetIntOrZR(context, op.Rn); + + int shift = op.GetBitsCount() - op.Shift; + + SetIntOrZR(context, op.Rd, context.ShiftLeft(res, Const(shift))); + } + + private static Operand GetBfmN(ArmEmitterContext context) + { + OpCodeBfm op = (OpCodeBfm)context.CurrOp; + + Operand res = GetIntOrZR(context, op.Rn); + + long mask = op.WMask & op.TMask; + + return context.BitwiseAnd(context.RotateRight(res, Const(op.Shift)), Const(res.Type, mask)); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Instructions/InstEmitCcmp.cs b/src/ARMeilleure/Instructions/InstEmitCcmp.cs new file mode 100644 index 00000000..7f0beb6c --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitCcmp.cs @@ -0,0 +1,61 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitAluHelper; +using static ARMeilleure.Instructions.InstEmitFlowHelper; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Ccmn(ArmEmitterContext context) => EmitCcmp(context, isNegated: true); + public static void Ccmp(ArmEmitterContext context) => EmitCcmp(context, isNegated: false); + + private static void EmitCcmp(ArmEmitterContext context, bool isNegated) + { + OpCodeCcmp op = (OpCodeCcmp)context.CurrOp; + + Operand lblTrue = Label(); + Operand lblEnd = Label(); + + EmitCondBranch(context, lblTrue, op.Cond); + + SetFlag(context, PState.VFlag, Const((op.Nzcv >> 0) & 1)); + SetFlag(context, PState.CFlag, Const((op.Nzcv >> 1) & 1)); + SetFlag(context, PState.ZFlag, Const((op.Nzcv >> 2) & 1)); + SetFlag(context, PState.NFlag, Const((op.Nzcv >> 3) & 1)); + + context.Branch(lblEnd); + + context.MarkLabel(lblTrue); + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + + if (isNegated) + { + Operand d = context.Add(n, m); + + EmitNZFlagsCheck(context, d); + + EmitAddsCCheck(context, n, d); + EmitAddsVCheck(context, n, m, d); + } + else + { + Operand d = context.Subtract(n, m); + + EmitNZFlagsCheck(context, d); + + EmitSubsCCheck(context, n, m); + EmitSubsVCheck(context, n, m, d); + } + + context.MarkLabel(lblEnd); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Instructions/InstEmitCsel.cs b/src/ARMeilleure/Instructions/InstEmitCsel.cs new file mode 100644 index 00000000..926b9a9e --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitCsel.cs @@ -0,0 +1,53 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitFlowHelper; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + private enum CselOperation + { + None, + Increment, + Invert, + Negate + } + + public static void Csel(ArmEmitterContext context) => EmitCsel(context, CselOperation.None); + public static void Csinc(ArmEmitterContext context) => EmitCsel(context, CselOperation.Increment); + public static void Csinv(ArmEmitterContext context) => EmitCsel(context, CselOperation.Invert); + public static void Csneg(ArmEmitterContext context) => EmitCsel(context, CselOperation.Negate); + + private static void EmitCsel(ArmEmitterContext context, CselOperation cselOp) + { + OpCodeCsel op = (OpCodeCsel)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + Operand m = GetIntOrZR(context, op.Rm); + + if (cselOp == CselOperation.Increment) + { + m = context.Add(m, Const(m.Type, 1)); + } + else if (cselOp == CselOperation.Invert) + { + m = context.BitwiseNot(m); + } + else if (cselOp == CselOperation.Negate) + { + m = context.Negate(m); + } + + Operand condTrue = GetCondTrue(context, op.Cond); + + Operand d = context.ConditionalSelect(condTrue, n, m); + + SetIntOrZR(context, op.Rd, d); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Instructions/InstEmitDiv.cs b/src/ARMeilleure/Instructions/InstEmitDiv.cs new file mode 100644 index 00000000..39a5c32e --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitDiv.cs @@ -0,0 +1,67 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Sdiv(ArmEmitterContext context) => EmitDiv(context, unsigned: false); + public static void Udiv(ArmEmitterContext context) => EmitDiv(context, unsigned: true); + + private static void EmitDiv(ArmEmitterContext context, bool unsigned) + { + OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp; + + // If Rm == 0, Rd = 0 (division by zero). + Operand n = GetIntOrZR(context, op.Rn); + Operand m = GetIntOrZR(context, op.Rm); + + Operand divisorIsZero = context.ICompareEqual(m, Const(m.Type, 0)); + + Operand lblBadDiv = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBadDiv, divisorIsZero); + + if (!unsigned) + { + // If Rn == INT_MIN && Rm == -1, Rd = INT_MIN (overflow). + bool is32Bits = op.RegisterSize == RegisterSize.Int32; + + Operand intMin = is32Bits ? Const(int.MinValue) : Const(long.MinValue); + Operand minus1 = is32Bits ? Const(-1) : Const(-1L); + + Operand nIsIntMin = context.ICompareEqual(n, intMin); + Operand mIsMinus1 = context.ICompareEqual(m, minus1); + + Operand lblGoodDiv = Label(); + + context.BranchIfFalse(lblGoodDiv, context.BitwiseAnd(nIsIntMin, mIsMinus1)); + + SetAluDOrZR(context, intMin); + + context.Branch(lblEnd); + + context.MarkLabel(lblGoodDiv); + } + + Operand d = unsigned + ? context.DivideUI(n, m) + : context.Divide (n, m); + + SetAluDOrZR(context, d); + + context.Branch(lblEnd); + + context.MarkLabel(lblBadDiv); + + SetAluDOrZR(context, Const(op.GetOperandType(), 0)); + + context.MarkLabel(lblEnd); + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitException.cs b/src/ARMeilleure/Instructions/InstEmitException.cs new file mode 100644 index 00000000..0baaa87d --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitException.cs @@ -0,0 +1,55 @@ +using ARMeilleure.Decoders; +using ARMeilleure.Translation; + +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Brk(ArmEmitterContext context) + { + OpCodeException op = (OpCodeException)context.CurrOp; + + string name = nameof(NativeInterface.Break); + + context.StoreToContext(); + + context.Call(typeof(NativeInterface).GetMethod(name), Const(op.Address), Const(op.Id)); + + context.LoadFromContext(); + + context.Return(Const(op.Address)); + } + + public static void Svc(ArmEmitterContext context) + { + OpCodeException op = (OpCodeException)context.CurrOp; + + string name = nameof(NativeInterface.SupervisorCall); + + context.StoreToContext(); + + context.Call(typeof(NativeInterface).GetMethod(name), Const(op.Address), Const(op.Id)); + + context.LoadFromContext(); + + Translator.EmitSynchronization(context); + } + + public static void Und(ArmEmitterContext context) + { + OpCode op = context.CurrOp; + + string name = nameof(NativeInterface.Undefined); + + context.StoreToContext(); + + context.Call(typeof(NativeInterface).GetMethod(name), Const(op.Address), Const(op.RawOpCode)); + + context.LoadFromContext(); + + context.Return(Const(op.Address)); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Instructions/InstEmitException32.cs b/src/ARMeilleure/Instructions/InstEmitException32.cs new file mode 100644 index 00000000..ec0c32bf --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitException32.cs @@ -0,0 +1,39 @@ +using ARMeilleure.Decoders; +using ARMeilleure.Translation; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Svc(ArmEmitterContext context) + { + IOpCode32Exception op = (IOpCode32Exception)context.CurrOp; + + string name = nameof(NativeInterface.SupervisorCall); + + context.StoreToContext(); + + context.Call(typeof(NativeInterface).GetMethod(name), Const(((IOpCode)op).Address), Const(op.Id)); + + context.LoadFromContext(); + + Translator.EmitSynchronization(context); + } + + public static void Trap(ArmEmitterContext context) + { + IOpCode32Exception op = (IOpCode32Exception)context.CurrOp; + + string name = nameof(NativeInterface.Break); + + context.StoreToContext(); + + context.Call(typeof(NativeInterface).GetMethod(name), Const(((IOpCode)op).Address), Const(op.Id)); + + context.LoadFromContext(); + + context.Return(Const(context.CurrOp.Address)); + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitFlow.cs b/src/ARMeilleure/Instructions/InstEmitFlow.cs new file mode 100644 index 00000000..c40eb55c --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitFlow.cs @@ -0,0 +1,107 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitFlowHelper; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void B(ArmEmitterContext context) + { + OpCodeBImmAl op = (OpCodeBImmAl)context.CurrOp; + + context.Branch(context.GetLabel((ulong)op.Immediate)); + } + + public static void B_Cond(ArmEmitterContext context) + { + OpCodeBImmCond op = (OpCodeBImmCond)context.CurrOp; + + EmitBranch(context, op.Cond); + } + + public static void Bl(ArmEmitterContext context) + { + OpCodeBImmAl op = (OpCodeBImmAl)context.CurrOp; + + context.Copy(GetIntOrZR(context, RegisterAlias.Lr), Const(op.Address + 4)); + + EmitCall(context, (ulong)op.Immediate); + } + + public static void Blr(ArmEmitterContext context) + { + OpCodeBReg op = (OpCodeBReg)context.CurrOp; + + Operand n = context.Copy(GetIntOrZR(context, op.Rn)); + + context.Copy(GetIntOrZR(context, RegisterAlias.Lr), Const(op.Address + 4)); + + EmitVirtualCall(context, n); + } + + public static void Br(ArmEmitterContext context) + { + OpCodeBReg op = (OpCodeBReg)context.CurrOp; + + EmitVirtualJump(context, GetIntOrZR(context, op.Rn), op.Rn == RegisterAlias.Lr); + } + + public static void Cbnz(ArmEmitterContext context) => EmitCb(context, onNotZero: true); + public static void Cbz(ArmEmitterContext context) => EmitCb(context, onNotZero: false); + + private static void EmitCb(ArmEmitterContext context, bool onNotZero) + { + OpCodeBImmCmp op = (OpCodeBImmCmp)context.CurrOp; + + EmitBranch(context, GetIntOrZR(context, op.Rt), onNotZero); + } + + public static void Ret(ArmEmitterContext context) + { + OpCodeBReg op = (OpCodeBReg)context.CurrOp; + + context.Return(GetIntOrZR(context, op.Rn)); + } + + public static void Tbnz(ArmEmitterContext context) => EmitTb(context, onNotZero: true); + public static void Tbz(ArmEmitterContext context) => EmitTb(context, onNotZero: false); + + private static void EmitTb(ArmEmitterContext context, bool onNotZero) + { + OpCodeBImmTest op = (OpCodeBImmTest)context.CurrOp; + + Operand value = context.BitwiseAnd(GetIntOrZR(context, op.Rt), Const(1L << op.Bit)); + + EmitBranch(context, value, onNotZero); + } + + private static void EmitBranch(ArmEmitterContext context, Condition cond) + { + OpCodeBImm op = (OpCodeBImm)context.CurrOp; + + EmitCondBranch(context, context.GetLabel((ulong)op.Immediate), cond); + } + + private static void EmitBranch(ArmEmitterContext context, Operand value, bool onNotZero) + { + OpCodeBImm op = (OpCodeBImm)context.CurrOp; + + Operand lblTarget = context.GetLabel((ulong)op.Immediate); + + if (onNotZero) + { + context.BranchIfTrue(lblTarget, value); + } + else + { + context.BranchIfFalse(lblTarget, value); + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Instructions/InstEmitFlow32.cs b/src/ARMeilleure/Instructions/InstEmitFlow32.cs new file mode 100644 index 00000000..3a7707ee --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitFlow32.cs @@ -0,0 +1,136 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitFlowHelper; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void B(ArmEmitterContext context) + { + IOpCode32BImm op = (IOpCode32BImm)context.CurrOp; + + context.Branch(context.GetLabel((ulong)op.Immediate)); + } + + public static void Bl(ArmEmitterContext context) + { + Blx(context, x: false); + } + + public static void Blx(ArmEmitterContext context) + { + Blx(context, x: true); + } + + private static void Blx(ArmEmitterContext context, bool x) + { + IOpCode32BImm op = (IOpCode32BImm)context.CurrOp; + + uint pc = op.GetPc(); + + bool isThumb = ((OpCode32)context.CurrOp).IsThumb; + + uint currentPc = isThumb + ? pc | 1 + : pc - 4; + + SetIntA32(context, GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr), Const(currentPc)); + + // If x is true, then this is a branch with link and exchange. + // In this case we need to swap the mode between Arm <-> Thumb. + if (x) + { + SetFlag(context, PState.TFlag, Const(isThumb ? 0 : 1)); + } + + EmitCall(context, (ulong)op.Immediate); + } + + public static void Blxr(ArmEmitterContext context) + { + IOpCode32BReg op = (IOpCode32BReg)context.CurrOp; + + uint pc = op.GetPc(); + + Operand addr = context.Copy(GetIntA32(context, op.Rm)); + Operand bitOne = context.BitwiseAnd(addr, Const(1)); + + bool isThumb = ((OpCode32)context.CurrOp).IsThumb; + + uint currentPc = isThumb + ? (pc - 2) | 1 + : pc - 4; + + SetIntA32(context, GetBankedRegisterAlias(context.Mode, RegisterAlias.Aarch32Lr), Const(currentPc)); + + SetFlag(context, PState.TFlag, bitOne); + + EmitBxWritePc(context, addr); + } + + public static void Bx(ArmEmitterContext context) + { + IOpCode32BReg op = (IOpCode32BReg)context.CurrOp; + + EmitBxWritePc(context, GetIntA32(context, op.Rm), op.Rm); + } + + public static void Cbnz(ArmEmitterContext context) => EmitCb(context, onNotZero: true); + public static void Cbz(ArmEmitterContext context) => EmitCb(context, onNotZero: false); + + private static void EmitCb(ArmEmitterContext context, bool onNotZero) + { + OpCodeT16BImmCmp op = (OpCodeT16BImmCmp)context.CurrOp; + + Operand value = GetIntA32(context, op.Rn); + Operand lblTarget = context.GetLabel((ulong)op.Immediate); + + if (onNotZero) + { + context.BranchIfTrue(lblTarget, value); + } + else + { + context.BranchIfFalse(lblTarget, value); + } + } + + public static void It(ArmEmitterContext context) + { + OpCodeT16IfThen op = (OpCodeT16IfThen)context.CurrOp; + + context.SetIfThenBlockState(op.IfThenBlockConds); + } + + public static void Tbb(ArmEmitterContext context) => EmitTb(context, halfword: false); + public static void Tbh(ArmEmitterContext context) => EmitTb(context, halfword: true); + + private static void EmitTb(ArmEmitterContext context, bool halfword) + { + OpCodeT32Tb op = (OpCodeT32Tb)context.CurrOp; + + Operand halfwords; + + if (halfword) + { + Operand address = context.Add(GetIntA32(context, op.Rn), context.ShiftLeft(GetIntA32(context, op.Rm), Const(1))); + halfwords = InstEmitMemoryHelper.EmitReadInt(context, address, 1); + } + else + { + Operand address = context.Add(GetIntA32(context, op.Rn), GetIntA32(context, op.Rm)); + halfwords = InstEmitMemoryHelper.EmitReadIntAligned(context, address, 0); + } + + Operand targetAddress = context.Add(Const((int)op.GetPc()), context.ShiftLeft(halfwords, Const(1))); + + EmitVirtualJump(context, targetAddress, isReturn: false); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs b/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs new file mode 100644 index 00000000..6ac32908 --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitFlowHelper.cs @@ -0,0 +1,240 @@ +using ARMeilleure.CodeGen.Linking; +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using ARMeilleure.Translation.PTC; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static class InstEmitFlowHelper + { + public static void EmitCondBranch(ArmEmitterContext context, Operand target, Condition cond) + { + if (cond != Condition.Al) + { + context.BranchIfTrue(target, GetCondTrue(context, cond)); + } + else + { + context.Branch(target); + } + } + + public static Operand GetCondTrue(ArmEmitterContext context, Condition condition) + { + Operand cmpResult = context.TryGetComparisonResult(condition); + + if (cmpResult != default) + { + return cmpResult; + } + + Operand value = Const(1); + + Operand Inverse(Operand val) + { + return context.BitwiseExclusiveOr(val, Const(1)); + } + + switch (condition) + { + case Condition.Eq: + value = GetFlag(PState.ZFlag); + break; + + case Condition.Ne: + value = Inverse(GetFlag(PState.ZFlag)); + break; + + case Condition.GeUn: + value = GetFlag(PState.CFlag); + break; + + case Condition.LtUn: + value = Inverse(GetFlag(PState.CFlag)); + break; + + case Condition.Mi: + value = GetFlag(PState.NFlag); + break; + + case Condition.Pl: + value = Inverse(GetFlag(PState.NFlag)); + break; + + case Condition.Vs: + value = GetFlag(PState.VFlag); + break; + + case Condition.Vc: + value = Inverse(GetFlag(PState.VFlag)); + break; + + case Condition.GtUn: + { + Operand c = GetFlag(PState.CFlag); + Operand z = GetFlag(PState.ZFlag); + + value = context.BitwiseAnd(c, Inverse(z)); + + break; + } + + case Condition.LeUn: + { + Operand c = GetFlag(PState.CFlag); + Operand z = GetFlag(PState.ZFlag); + + value = context.BitwiseOr(Inverse(c), z); + + break; + } + + case Condition.Ge: + { + Operand n = GetFlag(PState.NFlag); + Operand v = GetFlag(PState.VFlag); + + value = context.ICompareEqual(n, v); + + break; + } + + case Condition.Lt: + { + Operand n = GetFlag(PState.NFlag); + Operand v = GetFlag(PState.VFlag); + + value = context.ICompareNotEqual(n, v); + + break; + } + + case Condition.Gt: + { + Operand n = GetFlag(PState.NFlag); + Operand z = GetFlag(PState.ZFlag); + Operand v = GetFlag(PState.VFlag); + + value = context.BitwiseAnd(Inverse(z), context.ICompareEqual(n, v)); + + break; + } + + case Condition.Le: + { + Operand n = GetFlag(PState.NFlag); + Operand z = GetFlag(PState.ZFlag); + Operand v = GetFlag(PState.VFlag); + + value = context.BitwiseOr(z, context.ICompareNotEqual(n, v)); + + break; + } + } + + return value; + } + + public static void EmitCall(ArmEmitterContext context, ulong immediate) + { + bool isRecursive = immediate == context.EntryAddress; + + if (isRecursive) + { + context.Branch(context.GetLabel(immediate)); + } + else + { + EmitTableBranch(context, Const(immediate), isJump: false); + } + } + + public static void EmitVirtualCall(ArmEmitterContext context, Operand target) + { + EmitTableBranch(context, target, isJump: false); + } + + public static void EmitVirtualJump(ArmEmitterContext context, Operand target, bool isReturn) + { + if (isReturn) + { + if (target.Type == OperandType.I32) + { + target = context.ZeroExtend32(OperandType.I64, target); + } + + context.Return(target); + } + else + { + EmitTableBranch(context, target, isJump: true); + } + } + + private static void EmitTableBranch(ArmEmitterContext context, Operand guestAddress, bool isJump) + { + context.StoreToContext(); + + if (guestAddress.Type == OperandType.I32) + { + guestAddress = context.ZeroExtend32(OperandType.I64, guestAddress); + } + + // Store the target guest address into the native context. The stubs uses this address to dispatch into the + // next translation. + Operand nativeContext = context.LoadArgument(OperandType.I64, 0); + Operand dispAddressAddr = context.Add(nativeContext, Const((ulong)NativeContext.GetDispatchAddressOffset())); + context.Store(dispAddressAddr, guestAddress); + + Operand hostAddress; + + // If address is mapped onto the function table, we can skip the table walk. Otherwise we fallback + // onto the dispatch stub. + if (guestAddress.Kind == OperandKind.Constant && context.FunctionTable.IsValid(guestAddress.Value)) + { + Operand hostAddressAddr = !context.HasPtc ? + Const(ref context.FunctionTable.GetValue(guestAddress.Value)) : + Const(ref context.FunctionTable.GetValue(guestAddress.Value), new Symbol(SymbolType.FunctionTable, guestAddress.Value)); + + hostAddress = context.Load(OperandType.I64, hostAddressAddr); + } + else + { + hostAddress = !context.HasPtc ? + Const((long)context.Stubs.DispatchStub) : + Const((long)context.Stubs.DispatchStub, Ptc.DispatchStubSymbol); + } + + if (isJump) + { + context.Tailcall(hostAddress, nativeContext); + } + else + { + OpCode op = context.CurrOp; + + Operand returnAddress = context.Call(hostAddress, OperandType.I64, nativeContext); + + context.LoadFromContext(); + + // Note: The return value of a translated function is always an Int64 with the address execution has + // returned to. We expect this address to be immediately after the current instruction, if it isn't we + // keep returning until we reach the dispatcher. + Operand nextAddr = Const((long)op.Address + op.OpCodeSizeInBytes); + + // Try to continue within this block. + // If the return address isn't to our next instruction, we need to return so the JIT can figure out + // what to do. + Operand lblContinue = context.GetLabel(nextAddr.Value); + context.BranchIf(lblContinue, returnAddress, nextAddr, Comparison.Equal, BasicBlockFrequency.Cold); + + context.Return(returnAddress); + } + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitHash.cs b/src/ARMeilleure/Instructions/InstEmitHash.cs new file mode 100644 index 00000000..82b3e353 --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitHash.cs @@ -0,0 +1,69 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHashHelper; +using static ARMeilleure.Instructions.InstEmitHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + private const int ByteSizeLog2 = 0; + private const int HWordSizeLog2 = 1; + private const int WordSizeLog2 = 2; + private const int DWordSizeLog2 = 3; + + public static void Crc32b(ArmEmitterContext context) + { + EmitCrc32Call(context, ByteSizeLog2, false); + } + + public static void Crc32h(ArmEmitterContext context) + { + EmitCrc32Call(context, HWordSizeLog2, false); + } + + public static void Crc32w(ArmEmitterContext context) + { + EmitCrc32Call(context, WordSizeLog2, false); + } + + public static void Crc32x(ArmEmitterContext context) + { + EmitCrc32Call(context, DWordSizeLog2, false); + } + + public static void Crc32cb(ArmEmitterContext context) + { + EmitCrc32Call(context, ByteSizeLog2, true); + } + + public static void Crc32ch(ArmEmitterContext context) + { + EmitCrc32Call(context, HWordSizeLog2, true); + } + + public static void Crc32cw(ArmEmitterContext context) + { + EmitCrc32Call(context, WordSizeLog2, true); + } + + public static void Crc32cx(ArmEmitterContext context) + { + EmitCrc32Call(context, DWordSizeLog2, true); + } + + private static void EmitCrc32Call(ArmEmitterContext context, int size, bool c) + { + OpCodeAluBinary op = (OpCodeAluBinary)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + Operand m = GetIntOrZR(context, op.Rm); + + Operand d = EmitCrc32(context, n, m, size, c); + + SetIntOrZR(context, op.Rd, d); + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitHash32.cs b/src/ARMeilleure/Instructions/InstEmitHash32.cs new file mode 100644 index 00000000..5d39f8af --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitHash32.cs @@ -0,0 +1,53 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using static ARMeilleure.Instructions.InstEmitHashHelper; +using static ARMeilleure.Instructions.InstEmitHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Crc32b(ArmEmitterContext context) + { + EmitCrc32Call(context, ByteSizeLog2, false); + } + + public static void Crc32h(ArmEmitterContext context) + { + EmitCrc32Call(context, HWordSizeLog2, false); + } + + public static void Crc32w(ArmEmitterContext context) + { + EmitCrc32Call(context, WordSizeLog2, false); + } + + public static void Crc32cb(ArmEmitterContext context) + { + EmitCrc32Call(context, ByteSizeLog2, true); + } + + public static void Crc32ch(ArmEmitterContext context) + { + EmitCrc32Call(context, HWordSizeLog2, true); + } + + public static void Crc32cw(ArmEmitterContext context) + { + EmitCrc32Call(context, WordSizeLog2, true); + } + + private static void EmitCrc32Call(ArmEmitterContext context, int size, bool c) + { + IOpCode32AluReg op = (IOpCode32AluReg)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand m = GetIntA32(context, op.Rm); + + Operand d = EmitCrc32(context, n, m, size, c); + + EmitAluStore(context, d); + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitHashHelper.cs b/src/ARMeilleure/Instructions/InstEmitHashHelper.cs new file mode 100644 index 00000000..55a03a4f --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitHashHelper.cs @@ -0,0 +1,118 @@ +// https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf + +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static class InstEmitHashHelper + { + public const uint Crc32RevPoly = 0xedb88320; + public const uint Crc32cRevPoly = 0x82f63b78; + + public static Operand EmitCrc32(ArmEmitterContext context, Operand crc, Operand value, int size, bool castagnoli) + { + Debug.Assert(crc.Type.IsInteger() && value.Type.IsInteger()); + Debug.Assert(size >= 0 && size < 4); + Debug.Assert((size < 3) || (value.Type == OperandType.I64)); + + if (castagnoli && Optimizations.UseSse42) + { + // The CRC32 instruction does not have an immediate variant, so ensure both inputs are in registers. + value = (value.Kind == OperandKind.Constant) ? context.Copy(value) : value; + crc = (crc.Kind == OperandKind.Constant) ? context.Copy(crc) : crc; + + Intrinsic op = size switch + { + 0 => Intrinsic.X86Crc32_8, + 1 => Intrinsic.X86Crc32_16, + _ => Intrinsic.X86Crc32, + }; + + return (size == 3) ? context.ConvertI64ToI32(context.AddIntrinsicLong(op, crc, value)) : context.AddIntrinsicInt(op, crc, value); + } + else if (Optimizations.UsePclmulqdq) + { + return size switch + { + 3 => EmitCrc32Optimized64(context, crc, value, castagnoli), + _ => EmitCrc32Optimized(context, crc, value, castagnoli, size), + }; + } + else + { + string name = (size, castagnoli) switch + { + (0, false) => nameof(SoftFallback.Crc32b), + (1, false) => nameof(SoftFallback.Crc32h), + (2, false) => nameof(SoftFallback.Crc32w), + (3, false) => nameof(SoftFallback.Crc32x), + (0, true) => nameof(SoftFallback.Crc32cb), + (1, true) => nameof(SoftFallback.Crc32ch), + (2, true) => nameof(SoftFallback.Crc32cw), + (3, true) => nameof(SoftFallback.Crc32cx), + _ => throw new ArgumentOutOfRangeException(nameof(size)) + }; + + return context.Call(typeof(SoftFallback).GetMethod(name), crc, value); + } + } + + private static Operand EmitCrc32Optimized(ArmEmitterContext context, Operand crc, Operand data, bool castagnoli, int size) + { + long mu = castagnoli ? 0x0DEA713F1 : 0x1F7011641; // mu' = floor(x^64/P(x))' + long polynomial = castagnoli ? 0x105EC76F0 : 0x1DB710641; // P'(x) << 1 + + crc = context.VectorInsert(context.VectorZero(), crc, 0); + + switch (size) + { + case 0: data = context.VectorInsert8(context.VectorZero(), data, 0); break; + case 1: data = context.VectorInsert16(context.VectorZero(), data, 0); break; + case 2: data = context.VectorInsert(context.VectorZero(), data, 0); break; + } + + int bitsize = 8 << size; + + Operand tmp = context.AddIntrinsic(Intrinsic.X86Pxor, crc, data); + tmp = context.AddIntrinsic(Intrinsic.X86Psllq, tmp, Const(64 - bitsize)); + tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, mu), Const(0)); + tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0)); + + if (bitsize < 32) + { + crc = context.AddIntrinsic(Intrinsic.X86Pslldq, crc, Const((64 - bitsize) / 8)); + tmp = context.AddIntrinsic(Intrinsic.X86Pxor, tmp, crc); + } + + return context.VectorExtract(OperandType.I32, tmp, 2); + } + + private static Operand EmitCrc32Optimized64(ArmEmitterContext context, Operand crc, Operand data, bool castagnoli) + { + long mu = castagnoli ? 0x0DEA713F1 : 0x1F7011641; // mu' = floor(x^64/P(x))' + long polynomial = castagnoli ? 0x105EC76F0 : 0x1DB710641; // P'(x) << 1 + + crc = context.VectorInsert(context.VectorZero(), crc, 0); + data = context.VectorInsert(context.VectorZero(), data, 0); + + Operand tmp = context.AddIntrinsic(Intrinsic.X86Pxor, crc, data); + Operand res = context.AddIntrinsic(Intrinsic.X86Pslldq, tmp, Const(4)); + + tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, res, X86GetScalar(context, mu), Const(0)); + tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0)); + + tmp = context.AddIntrinsic(Intrinsic.X86Pxor, tmp, res); + tmp = context.AddIntrinsic(Intrinsic.X86Psllq, tmp, Const(32)); + + tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, mu), Const(1)); + tmp = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, tmp, X86GetScalar(context, polynomial), Const(0)); + + return context.VectorExtract(OperandType.I32, tmp, 2); + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitHelper.cs b/src/ARMeilleure/Instructions/InstEmitHelper.cs new file mode 100644 index 00000000..a22bb3fb --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitHelper.cs @@ -0,0 +1,264 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static class InstEmitHelper + { + public static Operand GetExtendedM(ArmEmitterContext context, int rm, IntType type) + { + Operand value = GetIntOrZR(context, rm); + + switch (type) + { + case IntType.UInt8: value = context.ZeroExtend8 (value.Type, value); break; + case IntType.UInt16: value = context.ZeroExtend16(value.Type, value); break; + case IntType.UInt32: value = context.ZeroExtend32(value.Type, value); break; + + case IntType.Int8: value = context.SignExtend8 (value.Type, value); break; + case IntType.Int16: value = context.SignExtend16(value.Type, value); break; + case IntType.Int32: value = context.SignExtend32(value.Type, value); break; + } + + return value; + } + + public static Operand GetIntA32(ArmEmitterContext context, int regIndex) + { + if (regIndex == RegisterAlias.Aarch32Pc) + { + OpCode32 op = (OpCode32)context.CurrOp; + + return Const((int)op.GetPc()); + } + else + { + return Register(GetRegisterAlias(context.Mode, regIndex), RegisterType.Integer, OperandType.I32); + } + } + + public static Operand GetIntA32AlignedPC(ArmEmitterContext context, int regIndex) + { + if (regIndex == RegisterAlias.Aarch32Pc) + { + OpCode32 op = (OpCode32)context.CurrOp; + + return Const((int)(op.GetPc() & 0xfffffffc)); + } + else + { + return Register(GetRegisterAlias(context.Mode, regIndex), RegisterType.Integer, OperandType.I32); + } + } + + public static Operand GetVecA32(int regIndex) + { + return Register(regIndex, RegisterType.Vector, OperandType.V128); + } + + public static void SetIntA32(ArmEmitterContext context, int regIndex, Operand value) + { + if (regIndex == RegisterAlias.Aarch32Pc) + { + if (!IsA32Return(context)) + { + context.StoreToContext(); + } + + EmitBxWritePc(context, value); + } + else + { + if (value.Type == OperandType.I64) + { + value = context.ConvertI64ToI32(value); + } + Operand reg = Register(GetRegisterAlias(context.Mode, regIndex), RegisterType.Integer, OperandType.I32); + + context.Copy(reg, value); + } + } + + public static int GetRegisterAlias(Aarch32Mode mode, int regIndex) + { + // Only registers >= 8 are banked, + // with registers in the range [8, 12] being + // banked for the FIQ mode, and registers + // 13 and 14 being banked for all modes. + if ((uint)regIndex < 8) + { + return regIndex; + } + + return GetBankedRegisterAlias(mode, regIndex); + } + + public static int GetBankedRegisterAlias(Aarch32Mode mode, int regIndex) + { + switch (regIndex) + { + case 8: return mode == Aarch32Mode.Fiq + ? RegisterAlias.R8Fiq + : RegisterAlias.R8Usr; + + case 9: return mode == Aarch32Mode.Fiq + ? RegisterAlias.R9Fiq + : RegisterAlias.R9Usr; + + case 10: return mode == Aarch32Mode.Fiq + ? RegisterAlias.R10Fiq + : RegisterAlias.R10Usr; + + case 11: return mode == Aarch32Mode.Fiq + ? RegisterAlias.R11Fiq + : RegisterAlias.R11Usr; + + case 12: return mode == Aarch32Mode.Fiq + ? RegisterAlias.R12Fiq + : RegisterAlias.R12Usr; + + case 13: + switch (mode) + { + case Aarch32Mode.User: + case Aarch32Mode.System: return RegisterAlias.SpUsr; + case Aarch32Mode.Fiq: return RegisterAlias.SpFiq; + case Aarch32Mode.Irq: return RegisterAlias.SpIrq; + case Aarch32Mode.Supervisor: return RegisterAlias.SpSvc; + case Aarch32Mode.Abort: return RegisterAlias.SpAbt; + case Aarch32Mode.Hypervisor: return RegisterAlias.SpHyp; + case Aarch32Mode.Undefined: return RegisterAlias.SpUnd; + + default: throw new ArgumentException(nameof(mode)); + } + + case 14: + switch (mode) + { + case Aarch32Mode.User: + case Aarch32Mode.Hypervisor: + case Aarch32Mode.System: return RegisterAlias.LrUsr; + case Aarch32Mode.Fiq: return RegisterAlias.LrFiq; + case Aarch32Mode.Irq: return RegisterAlias.LrIrq; + case Aarch32Mode.Supervisor: return RegisterAlias.LrSvc; + case Aarch32Mode.Abort: return RegisterAlias.LrAbt; + case Aarch32Mode.Undefined: return RegisterAlias.LrUnd; + + default: throw new ArgumentException(nameof(mode)); + } + + default: throw new ArgumentOutOfRangeException(nameof(regIndex)); + } + } + + public static bool IsA32Return(ArmEmitterContext context) + { + switch (context.CurrOp) + { + case IOpCode32MemMult op: + return true; // Setting PC using LDM is nearly always a return. + case OpCode32AluRsImm op: + return op.Rm == RegisterAlias.Aarch32Lr; + case OpCode32AluRsReg op: + return op.Rm == RegisterAlias.Aarch32Lr; + case OpCode32AluReg op: + return op.Rm == RegisterAlias.Aarch32Lr; + case OpCode32Mem op: + return op.Rn == RegisterAlias.Aarch32Sp && op.WBack && !op.Index; // Setting PC to an address stored on the stack is nearly always a return. + } + return false; + } + + public static void EmitBxWritePc(ArmEmitterContext context, Operand pc, int sourceRegister = 0) + { + bool isReturn = sourceRegister == RegisterAlias.Aarch32Lr || IsA32Return(context); + Operand mode = context.BitwiseAnd(pc, Const(1)); + + SetFlag(context, PState.TFlag, mode); + + Operand addr = context.ConditionalSelect(mode, context.BitwiseAnd(pc, Const(~1)), context.BitwiseAnd(pc, Const(~3))); + + InstEmitFlowHelper.EmitVirtualJump(context, addr, isReturn); + } + + public static Operand GetIntOrZR(ArmEmitterContext context, int regIndex) + { + if (regIndex == RegisterConsts.ZeroIndex) + { + OperandType type = context.CurrOp.GetOperandType(); + + return type == OperandType.I32 ? Const(0) : Const(0L); + } + else + { + return GetIntOrSP(context, regIndex); + } + } + + public static void SetIntOrZR(ArmEmitterContext context, int regIndex, Operand value) + { + if (regIndex == RegisterConsts.ZeroIndex) + { + return; + } + + SetIntOrSP(context, regIndex, value); + } + + public static Operand GetIntOrSP(ArmEmitterContext context, int regIndex) + { + Operand value = Register(regIndex, RegisterType.Integer, OperandType.I64); + + if (context.CurrOp.RegisterSize == RegisterSize.Int32) + { + value = context.ConvertI64ToI32(value); + } + + return value; + } + + public static void SetIntOrSP(ArmEmitterContext context, int regIndex, Operand value) + { + Operand reg = Register(regIndex, RegisterType.Integer, OperandType.I64); + + if (value.Type == OperandType.I32) + { + value = context.ZeroExtend32(OperandType.I64, value); + } + + context.Copy(reg, value); + } + + public static Operand GetVec(int regIndex) + { + return Register(regIndex, RegisterType.Vector, OperandType.V128); + } + + public static Operand GetFlag(PState stateFlag) + { + return Register((int)stateFlag, RegisterType.Flag, OperandType.I32); + } + + public static Operand GetFpFlag(FPState stateFlag) + { + return Register((int)stateFlag, RegisterType.FpFlag, OperandType.I32); + } + + public static void SetFlag(ArmEmitterContext context, PState stateFlag, Operand value) + { + context.Copy(GetFlag(stateFlag), value); + + context.MarkFlagSet(stateFlag); + } + + public static void SetFpFlag(ArmEmitterContext context, FPState stateFlag, Operand value) + { + context.Copy(GetFpFlag(stateFlag), value); + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitMemory.cs b/src/ARMeilleure/Instructions/InstEmitMemory.cs new file mode 100644 index 00000000..7baed14c --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitMemory.cs @@ -0,0 +1,184 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitMemoryHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Adr(ArmEmitterContext context) + { + OpCodeAdr op = (OpCodeAdr)context.CurrOp; + + SetIntOrZR(context, op.Rd, Const(op.Address + (ulong)op.Immediate)); + } + + public static void Adrp(ArmEmitterContext context) + { + OpCodeAdr op = (OpCodeAdr)context.CurrOp; + + ulong address = (op.Address & ~0xfffUL) + ((ulong)op.Immediate << 12); + + SetIntOrZR(context, op.Rd, Const(address)); + } + + public static void Ldr(ArmEmitterContext context) => EmitLdr(context, signed: false); + public static void Ldrs(ArmEmitterContext context) => EmitLdr(context, signed: true); + + private static void EmitLdr(ArmEmitterContext context, bool signed) + { + OpCodeMem op = (OpCodeMem)context.CurrOp; + + Operand address = GetAddress(context); + + if (signed && op.Extend64) + { + EmitLoadSx64(context, address, op.Rt, op.Size); + } + else if (signed) + { + EmitLoadSx32(context, address, op.Rt, op.Size); + } + else + { + EmitLoadZx(context, address, op.Rt, op.Size); + } + + EmitWBackIfNeeded(context, address); + } + + public static void Ldr_Literal(ArmEmitterContext context) + { + IOpCodeLit op = (IOpCodeLit)context.CurrOp; + + if (op.Prefetch) + { + return; + } + + if (op.Signed) + { + EmitLoadSx64(context, Const(op.Immediate), op.Rt, op.Size); + } + else + { + EmitLoadZx(context, Const(op.Immediate), op.Rt, op.Size); + } + } + + public static void Ldp(ArmEmitterContext context) + { + OpCodeMemPair op = (OpCodeMemPair)context.CurrOp; + + void EmitLoad(int rt, Operand ldAddr) + { + if (op.Extend64) + { + EmitLoadSx64(context, ldAddr, rt, op.Size); + } + else + { + EmitLoadZx(context, ldAddr, rt, op.Size); + } + } + + Operand address = GetAddress(context); + Operand address2 = GetAddress(context, 1L << op.Size); + + EmitLoad(op.Rt, address); + EmitLoad(op.Rt2, address2); + + EmitWBackIfNeeded(context, address); + } + + public static void Str(ArmEmitterContext context) + { + OpCodeMem op = (OpCodeMem)context.CurrOp; + + Operand address = GetAddress(context); + + EmitStore(context, address, op.Rt, op.Size); + + EmitWBackIfNeeded(context, address); + } + + public static void Stp(ArmEmitterContext context) + { + OpCodeMemPair op = (OpCodeMemPair)context.CurrOp; + + Operand address = GetAddress(context); + Operand address2 = GetAddress(context, 1L << op.Size); + + EmitStore(context, address, op.Rt, op.Size); + EmitStore(context, address2, op.Rt2, op.Size); + + EmitWBackIfNeeded(context, address); + } + + private static Operand GetAddress(ArmEmitterContext context, long addend = 0) + { + Operand address = default; + + switch (context.CurrOp) + { + case OpCodeMemImm op: + { + address = context.Copy(GetIntOrSP(context, op.Rn)); + + // Pre-indexing. + if (!op.PostIdx) + { + address = context.Add(address, Const(op.Immediate + addend)); + } + else if (addend != 0) + { + address = context.Add(address, Const(addend)); + } + + break; + } + + case OpCodeMemReg op: + { + Operand n = GetIntOrSP(context, op.Rn); + + Operand m = GetExtendedM(context, op.Rm, op.IntType); + + if (op.Shift) + { + m = context.ShiftLeft(m, Const(op.Size)); + } + + address = context.Add(n, m); + + if (addend != 0) + { + address = context.Add(address, Const(addend)); + } + + break; + } + } + + return address; + } + + private static void EmitWBackIfNeeded(ArmEmitterContext context, Operand address) + { + // Check whenever the current OpCode has post-indexed write back, if so write it. + if (context.CurrOp is OpCodeMemImm op && op.WBack) + { + if (op.PostIdx) + { + address = context.Add(address, Const(op.Immediate)); + } + + SetIntOrSP(context, op.Rn, address); + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Instructions/InstEmitMemory32.cs b/src/ARMeilleure/Instructions/InstEmitMemory32.cs new file mode 100644 index 00000000..17ec97aa --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitMemory32.cs @@ -0,0 +1,265 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitMemoryHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + private const int ByteSizeLog2 = 0; + private const int HWordSizeLog2 = 1; + private const int WordSizeLog2 = 2; + private const int DWordSizeLog2 = 3; + + [Flags] + enum AccessType + { + Store = 0, + Signed = 1, + Load = 2, + Ordered = 4, + Exclusive = 8, + + LoadZx = Load, + LoadSx = Load | Signed, + } + + public static void Ldm(ArmEmitterContext context) + { + IOpCode32MemMult op = (IOpCode32MemMult)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + + Operand baseAddress = context.Add(n, Const(op.Offset)); + + bool writesToPc = (op.RegisterMask & (1 << RegisterAlias.Aarch32Pc)) != 0; + + bool writeBack = op.PostOffset != 0 && (op.Rn != RegisterAlias.Aarch32Pc || !writesToPc); + + if (writeBack) + { + SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset))); + } + + int mask = op.RegisterMask; + int offset = 0; + + for (int register = 0; mask != 0; mask >>= 1, register++) + { + if ((mask & 1) != 0) + { + Operand address = context.Add(baseAddress, Const(offset)); + + EmitLoadZx(context, address, register, WordSizeLog2); + + offset += 4; + } + } + } + + public static void Ldr(ArmEmitterContext context) + { + EmitLoadOrStore(context, WordSizeLog2, AccessType.LoadZx); + } + + public static void Ldrb(ArmEmitterContext context) + { + EmitLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx); + } + + public static void Ldrd(ArmEmitterContext context) + { + EmitLoadOrStore(context, DWordSizeLog2, AccessType.LoadZx); + } + + public static void Ldrh(ArmEmitterContext context) + { + EmitLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx); + } + + public static void Ldrsb(ArmEmitterContext context) + { + EmitLoadOrStore(context, ByteSizeLog2, AccessType.LoadSx); + } + + public static void Ldrsh(ArmEmitterContext context) + { + EmitLoadOrStore(context, HWordSizeLog2, AccessType.LoadSx); + } + + public static void Stm(ArmEmitterContext context) + { + IOpCode32MemMult op = (IOpCode32MemMult)context.CurrOp; + + Operand n = context.Copy(GetIntA32(context, op.Rn)); + + Operand baseAddress = context.Add(n, Const(op.Offset)); + + int mask = op.RegisterMask; + int offset = 0; + + for (int register = 0; mask != 0; mask >>= 1, register++) + { + if ((mask & 1) != 0) + { + Operand address = context.Add(baseAddress, Const(offset)); + + EmitStore(context, address, register, WordSizeLog2); + + // Note: If Rn is also specified on the register list, + // and Rn is the first register on this list, then the + // value that is written to memory is the unmodified value, + // before the write back. If it is on the list, but it's + // not the first one, then the value written to memory + // varies between CPUs. + if (offset == 0 && op.PostOffset != 0) + { + // Emit write back after the first write. + SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset))); + } + + offset += 4; + } + } + } + + public static void Str(ArmEmitterContext context) + { + EmitLoadOrStore(context, WordSizeLog2, AccessType.Store); + } + + public static void Strb(ArmEmitterContext context) + { + EmitLoadOrStore(context, ByteSizeLog2, AccessType.Store); + } + + public static void Strd(ArmEmitterContext context) + { + EmitLoadOrStore(context, DWordSizeLog2, AccessType.Store); + } + + public static void Strh(ArmEmitterContext context) + { + EmitLoadOrStore(context, HWordSizeLog2, AccessType.Store); + } + + private static void EmitLoadOrStore(ArmEmitterContext context, int size, AccessType accType) + { + IOpCode32Mem op = (IOpCode32Mem)context.CurrOp; + + Operand n = context.Copy(GetIntA32AlignedPC(context, op.Rn)); + Operand m = GetMemM(context, setCarry: false); + + Operand temp = default; + + if (op.Index || op.WBack) + { + temp = op.Add + ? context.Add (n, m) + : context.Subtract(n, m); + } + + if (op.WBack) + { + SetIntA32(context, op.Rn, temp); + } + + Operand address; + + if (op.Index) + { + address = temp; + } + else + { + address = n; + } + + if ((accType & AccessType.Load) != 0) + { + void Load(int rt, int offs, int loadSize) + { + Operand addr = context.Add(address, Const(offs)); + + if ((accType & AccessType.Signed) != 0) + { + EmitLoadSx32(context, addr, rt, loadSize); + } + else + { + EmitLoadZx(context, addr, rt, loadSize); + } + } + + if (size == DWordSizeLog2) + { + Operand lblBigEndian = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); + + Load(op.Rt, 0, WordSizeLog2); + Load(op.Rt2, 4, WordSizeLog2); + + context.Branch(lblEnd); + + context.MarkLabel(lblBigEndian); + + Load(op.Rt2, 0, WordSizeLog2); + Load(op.Rt, 4, WordSizeLog2); + + context.MarkLabel(lblEnd); + } + else + { + Load(op.Rt, 0, size); + } + } + else + { + void Store(int rt, int offs, int storeSize) + { + Operand addr = context.Add(address, Const(offs)); + + EmitStore(context, addr, rt, storeSize); + } + + if (size == DWordSizeLog2) + { + Operand lblBigEndian = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); + + Store(op.Rt, 0, WordSizeLog2); + Store(op.Rt2, 4, WordSizeLog2); + + context.Branch(lblEnd); + + context.MarkLabel(lblBigEndian); + + Store(op.Rt2, 0, WordSizeLog2); + Store(op.Rt, 4, WordSizeLog2); + + context.MarkLabel(lblEnd); + } + else + { + Store(op.Rt, 0, size); + } + } + } + + public static void Adr(ArmEmitterContext context) + { + IOpCode32Adr op = (IOpCode32Adr)context.CurrOp; + SetIntA32(context, op.Rd, Const(op.Immediate)); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Instructions/InstEmitMemoryEx.cs b/src/ARMeilleure/Instructions/InstEmitMemoryEx.cs new file mode 100644 index 00000000..c7ed01e3 --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitMemoryEx.cs @@ -0,0 +1,178 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitMemoryExHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + [Flags] + private enum AccessType + { + None = 0, + Ordered = 1, + Exclusive = 2, + OrderedEx = Ordered | Exclusive + } + + public static void Clrex(ArmEmitterContext context) + { + EmitClearExclusive(context); + } + + public static void Csdb(ArmEmitterContext context) + { + // Execute as no-op. + } + + public static void Dmb(ArmEmitterContext context) => EmitBarrier(context); + public static void Dsb(ArmEmitterContext context) => EmitBarrier(context); + + public static void Ldar(ArmEmitterContext context) => EmitLdr(context, AccessType.Ordered); + public static void Ldaxr(ArmEmitterContext context) => EmitLdr(context, AccessType.OrderedEx); + public static void Ldxr(ArmEmitterContext context) => EmitLdr(context, AccessType.Exclusive); + public static void Ldxp(ArmEmitterContext context) => EmitLdp(context, AccessType.Exclusive); + public static void Ldaxp(ArmEmitterContext context) => EmitLdp(context, AccessType.OrderedEx); + + private static void EmitLdr(ArmEmitterContext context, AccessType accType) + { + EmitLoadEx(context, accType, pair: false); + } + + private static void EmitLdp(ArmEmitterContext context, AccessType accType) + { + EmitLoadEx(context, accType, pair: true); + } + + private static void EmitLoadEx(ArmEmitterContext context, AccessType accType, bool pair) + { + OpCodeMemEx op = (OpCodeMemEx)context.CurrOp; + + bool ordered = (accType & AccessType.Ordered) != 0; + bool exclusive = (accType & AccessType.Exclusive) != 0; + + if (ordered) + { + EmitBarrier(context); + } + + Operand address = context.Copy(GetIntOrSP(context, op.Rn)); + + if (pair) + { + // Exclusive loads should be atomic. For pairwise loads, we need to + // read all the data at once. For a 32-bits pairwise load, we do a + // simple 64-bits load, for a 128-bits load, we need to call a special + // method to read 128-bits atomically. + if (op.Size == 2) + { + Operand value = EmitLoadExclusive(context, address, exclusive, 3); + + Operand valueLow = context.ConvertI64ToI32(value); + + valueLow = context.ZeroExtend32(OperandType.I64, valueLow); + + Operand valueHigh = context.ShiftRightUI(value, Const(32)); + + SetIntOrZR(context, op.Rt, valueLow); + SetIntOrZR(context, op.Rt2, valueHigh); + } + else if (op.Size == 3) + { + Operand value = EmitLoadExclusive(context, address, exclusive, 4); + + Operand valueLow = context.VectorExtract(OperandType.I64, value, 0); + Operand valueHigh = context.VectorExtract(OperandType.I64, value, 1); + + SetIntOrZR(context, op.Rt, valueLow); + SetIntOrZR(context, op.Rt2, valueHigh); + } + else + { + throw new InvalidOperationException($"Invalid load size of {1 << op.Size} bytes."); + } + } + else + { + // 8, 16, 32 or 64-bits (non-pairwise) load. + Operand value = EmitLoadExclusive(context, address, exclusive, op.Size); + + SetIntOrZR(context, op.Rt, value); + } + } + + public static void Prfm(ArmEmitterContext context) + { + // Memory Prefetch, execute as no-op. + } + + public static void Stlr(ArmEmitterContext context) => EmitStr(context, AccessType.Ordered); + public static void Stlxr(ArmEmitterContext context) => EmitStr(context, AccessType.OrderedEx); + public static void Stxr(ArmEmitterContext context) => EmitStr(context, AccessType.Exclusive); + public static void Stxp(ArmEmitterContext context) => EmitStp(context, AccessType.Exclusive); + public static void Stlxp(ArmEmitterContext context) => EmitStp(context, AccessType.OrderedEx); + + private static void EmitStr(ArmEmitterContext context, AccessType accType) + { + EmitStoreEx(context, accType, pair: false); + } + + private static void EmitStp(ArmEmitterContext context, AccessType accType) + { + EmitStoreEx(context, accType, pair: true); + } + + private static void EmitStoreEx(ArmEmitterContext context, AccessType accType, bool pair) + { + OpCodeMemEx op = (OpCodeMemEx)context.CurrOp; + + bool ordered = (accType & AccessType.Ordered) != 0; + bool exclusive = (accType & AccessType.Exclusive) != 0; + + Operand address = context.Copy(GetIntOrSP(context, op.Rn)); + + Operand t = GetIntOrZR(context, op.Rt); + + if (pair) + { + Debug.Assert(op.Size == 2 || op.Size == 3, "Invalid size for pairwise store."); + + Operand t2 = GetIntOrZR(context, op.Rt2); + + Operand value; + + if (op.Size == 2) + { + value = context.BitwiseOr(t, context.ShiftLeft(t2, Const(32))); + } + else /* if (op.Size == 3) */ + { + value = context.VectorInsert(context.VectorZero(), t, 0); + value = context.VectorInsert(value, t2, 1); + } + + EmitStoreExclusive(context, address, value, exclusive, op.Size + 1, op.Rs, a32: false); + } + else + { + EmitStoreExclusive(context, address, t, exclusive, op.Size, op.Rs, a32: false); + } + + if (ordered) + { + EmitBarrier(context); + } + } + + private static void EmitBarrier(ArmEmitterContext context) + { + context.MemoryBarrier(); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Instructions/InstEmitMemoryEx32.cs b/src/ARMeilleure/Instructions/InstEmitMemoryEx32.cs new file mode 100644 index 00000000..c0b6fc39 --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitMemoryEx32.cs @@ -0,0 +1,237 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitMemoryExHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Clrex(ArmEmitterContext context) + { + EmitClearExclusive(context); + } + + public static void Csdb(ArmEmitterContext context) + { + // Execute as no-op. + } + + public static void Dmb(ArmEmitterContext context) => EmitBarrier(context); + + public static void Dsb(ArmEmitterContext context) => EmitBarrier(context); + + public static void Ldrex(ArmEmitterContext context) + { + EmitExLoadOrStore(context, WordSizeLog2, AccessType.LoadZx | AccessType.Exclusive); + } + + public static void Ldrexb(ArmEmitterContext context) + { + EmitExLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx | AccessType.Exclusive); + } + + public static void Ldrexd(ArmEmitterContext context) + { + EmitExLoadOrStore(context, DWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive); + } + + public static void Ldrexh(ArmEmitterContext context) + { + EmitExLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive); + } + + public static void Lda(ArmEmitterContext context) + { + EmitExLoadOrStore(context, WordSizeLog2, AccessType.LoadZx | AccessType.Ordered); + } + + public static void Ldab(ArmEmitterContext context) + { + EmitExLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx | AccessType.Ordered); + } + + public static void Ldaex(ArmEmitterContext context) + { + EmitExLoadOrStore(context, WordSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Ldaexb(ArmEmitterContext context) + { + EmitExLoadOrStore(context, ByteSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Ldaexd(ArmEmitterContext context) + { + EmitExLoadOrStore(context, DWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Ldaexh(ArmEmitterContext context) + { + EmitExLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Ldah(ArmEmitterContext context) + { + EmitExLoadOrStore(context, HWordSizeLog2, AccessType.LoadZx | AccessType.Ordered); + } + + // Stores. + + public static void Strex(ArmEmitterContext context) + { + EmitExLoadOrStore(context, WordSizeLog2, AccessType.Store | AccessType.Exclusive); + } + + public static void Strexb(ArmEmitterContext context) + { + EmitExLoadOrStore(context, ByteSizeLog2, AccessType.Store | AccessType.Exclusive); + } + + public static void Strexd(ArmEmitterContext context) + { + EmitExLoadOrStore(context, DWordSizeLog2, AccessType.Store | AccessType.Exclusive); + } + + public static void Strexh(ArmEmitterContext context) + { + EmitExLoadOrStore(context, HWordSizeLog2, AccessType.Store | AccessType.Exclusive); + } + + public static void Stl(ArmEmitterContext context) + { + EmitExLoadOrStore(context, WordSizeLog2, AccessType.Store | AccessType.Ordered); + } + + public static void Stlb(ArmEmitterContext context) + { + EmitExLoadOrStore(context, ByteSizeLog2, AccessType.Store | AccessType.Ordered); + } + + public static void Stlex(ArmEmitterContext context) + { + EmitExLoadOrStore(context, WordSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Stlexb(ArmEmitterContext context) + { + EmitExLoadOrStore(context, ByteSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Stlexd(ArmEmitterContext context) + { + EmitExLoadOrStore(context, DWordSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Stlexh(ArmEmitterContext context) + { + EmitExLoadOrStore(context, HWordSizeLog2, AccessType.Store | AccessType.Exclusive | AccessType.Ordered); + } + + public static void Stlh(ArmEmitterContext context) + { + EmitExLoadOrStore(context, HWordSizeLog2, AccessType.Store | AccessType.Ordered); + } + + private static void EmitExLoadOrStore(ArmEmitterContext context, int size, AccessType accType) + { + IOpCode32MemEx op = (IOpCode32MemEx)context.CurrOp; + + Operand address = context.Copy(GetIntA32(context, op.Rn)); + + var exclusive = (accType & AccessType.Exclusive) != 0; + var ordered = (accType & AccessType.Ordered) != 0; + + if ((accType & AccessType.Load) != 0) + { + if (ordered) + { + EmitBarrier(context); + } + + if (size == DWordSizeLog2) + { + // Keep loads atomic - make the call to get the whole region and then decompose it into parts + // for the registers. + + Operand value = EmitLoadExclusive(context, address, exclusive, size); + + Operand valueLow = context.ConvertI64ToI32(value); + + valueLow = context.ZeroExtend32(OperandType.I64, valueLow); + + Operand valueHigh = context.ShiftRightUI(value, Const(32)); + + Operand lblBigEndian = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); + + SetIntA32(context, op.Rt, valueLow); + SetIntA32(context, op.Rt2, valueHigh); + + context.Branch(lblEnd); + + context.MarkLabel(lblBigEndian); + + SetIntA32(context, op.Rt2, valueLow); + SetIntA32(context, op.Rt, valueHigh); + + context.MarkLabel(lblEnd); + } + else + { + SetIntA32(context, op.Rt, EmitLoadExclusive(context, address, exclusive, size)); + } + } + else + { + if (size == DWordSizeLog2) + { + // Split the result into 2 words (based on endianness) + + Operand lo = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rt)); + Operand hi = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rt2)); + + Operand lblBigEndian = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); + + Operand leResult = context.BitwiseOr(lo, context.ShiftLeft(hi, Const(32))); + EmitStoreExclusive(context, address, leResult, exclusive, size, op.Rd, a32: true); + + context.Branch(lblEnd); + + context.MarkLabel(lblBigEndian); + + Operand beResult = context.BitwiseOr(hi, context.ShiftLeft(lo, Const(32))); + EmitStoreExclusive(context, address, beResult, exclusive, size, op.Rd, a32: true); + + context.MarkLabel(lblEnd); + } + else + { + Operand value = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rt)); + EmitStoreExclusive(context, address, value, exclusive, size, op.Rd, a32: true); + } + + if (ordered) + { + EmitBarrier(context); + } + } + } + + private static void EmitBarrier(ArmEmitterContext context) + { + // Note: This barrier is most likely not necessary, and probably + // doesn't make any difference since we need to do a ton of stuff + // (software MMU emulation) to read or write anything anyway. + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs b/src/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs new file mode 100644 index 00000000..9a69442a --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitMemoryExHelper.cs @@ -0,0 +1,174 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static class InstEmitMemoryExHelper + { + private const int ErgSizeLog2 = 4; + + public static Operand EmitLoadExclusive(ArmEmitterContext context, Operand address, bool exclusive, int size) + { + if (exclusive) + { + Operand value; + + if (size == 4) + { + // Only 128-bit CAS is guaranteed to have a atomic load. + Operand physAddr = InstEmitMemoryHelper.EmitPtPointerLoad(context, address, default, write: false, 4); + + Operand zero = context.VectorZero(); + + value = context.CompareAndSwap(physAddr, zero, zero); + } + else + { + value = InstEmitMemoryHelper.EmitReadIntAligned(context, address, size); + } + + Operand arg0 = context.LoadArgument(OperandType.I64, 0); + + Operand exAddrPtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveAddressOffset())); + Operand exValuePtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveValueOffset())); + + context.Store(exAddrPtr, context.BitwiseAnd(address, Const(address.Type, GetExclusiveAddressMask()))); + + // Make sure the unused higher bits of the value are cleared. + if (size < 3) + { + context.Store(exValuePtr, Const(0UL)); + } + if (size < 4) + { + context.Store(context.Add(exValuePtr, Const(exValuePtr.Type, 8L)), Const(0UL)); + } + + // Store the new exclusive value. + context.Store(exValuePtr, value); + + return value; + } + else + { + return InstEmitMemoryHelper.EmitReadIntAligned(context, address, size); + } + } + + public static void EmitStoreExclusive( + ArmEmitterContext context, + Operand address, + Operand value, + bool exclusive, + int size, + int rs, + bool a32) + { + if (size < 3) + { + value = context.ConvertI64ToI32(value); + } + + if (exclusive) + { + // We overwrite one of the register (Rs), + // keep a copy of the values to ensure we are working with the correct values. + address = context.Copy(address); + value = context.Copy(value); + + void SetRs(Operand value) + { + if (a32) + { + SetIntA32(context, rs, value); + } + else + { + SetIntOrZR(context, rs, value); + } + } + + Operand arg0 = context.LoadArgument(OperandType.I64, 0); + + Operand exAddrPtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveAddressOffset())); + Operand exAddr = context.Load(address.Type, exAddrPtr); + + // STEP 1: Check if we have exclusive access to this memory region. If not, fail and skip store. + Operand maskedAddress = context.BitwiseAnd(address, Const(address.Type, GetExclusiveAddressMask())); + + Operand exFailed = context.ICompareNotEqual(exAddr, maskedAddress); + + Operand lblExit = Label(); + + SetRs(Const(1)); + + context.BranchIfTrue(lblExit, exFailed); + + // STEP 2: We have exclusive access and the address is valid, attempt the store using CAS. + Operand physAddr = InstEmitMemoryHelper.EmitPtPointerLoad(context, address, default, write: true, size); + + Operand exValuePtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveValueOffset())); + Operand exValue = size switch + { + 0 => context.Load8(exValuePtr), + 1 => context.Load16(exValuePtr), + 2 => context.Load(OperandType.I32, exValuePtr), + 3 => context.Load(OperandType.I64, exValuePtr), + _ => context.Load(OperandType.V128, exValuePtr) + }; + + Operand currValue = size switch + { + 0 => context.CompareAndSwap8(physAddr, exValue, value), + 1 => context.CompareAndSwap16(physAddr, exValue, value), + _ => context.CompareAndSwap(physAddr, exValue, value) + }; + + // STEP 3: Check if we succeeded by comparing expected and in-memory values. + Operand storeFailed; + + if (size == 4) + { + Operand currValueLow = context.VectorExtract(OperandType.I64, currValue, 0); + Operand currValueHigh = context.VectorExtract(OperandType.I64, currValue, 1); + + Operand exValueLow = context.VectorExtract(OperandType.I64, exValue, 0); + Operand exValueHigh = context.VectorExtract(OperandType.I64, exValue, 1); + + storeFailed = context.BitwiseOr( + context.ICompareNotEqual(currValueLow, exValueLow), + context.ICompareNotEqual(currValueHigh, exValueHigh)); + } + else + { + storeFailed = context.ICompareNotEqual(currValue, exValue); + } + + SetRs(storeFailed); + + context.MarkLabel(lblExit); + } + else + { + InstEmitMemoryHelper.EmitWriteIntAligned(context, address, value, size); + } + } + + public static void EmitClearExclusive(ArmEmitterContext context) + { + Operand arg0 = context.LoadArgument(OperandType.I64, 0); + + Operand exAddrPtr = context.Add(arg0, Const((long)NativeContext.GetExclusiveAddressOffset())); + + // We store ULONG max to force any exclusive address checks to fail, + // since this value is not aligned to the ERG mask. + context.Store(exAddrPtr, Const(ulong.MaxValue)); + } + + private static long GetExclusiveAddressMask() => ~((4L << ErgSizeLog2) - 1); + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitMemoryHelper.cs b/src/ARMeilleure/Instructions/InstEmitMemoryHelper.cs new file mode 100644 index 00000000..f97e395c --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitMemoryHelper.cs @@ -0,0 +1,648 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Memory; +using ARMeilleure.Translation; +using ARMeilleure.Translation.PTC; +using System; +using System.Reflection; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static class InstEmitMemoryHelper + { + private const int PageBits = 12; + private const int PageMask = (1 << PageBits) - 1; + + private enum Extension + { + Zx, + Sx32, + Sx64 + } + + public static void EmitLoadZx(ArmEmitterContext context, Operand address, int rt, int size) + { + EmitLoad(context, address, Extension.Zx, rt, size); + } + + public static void EmitLoadSx32(ArmEmitterContext context, Operand address, int rt, int size) + { + EmitLoad(context, address, Extension.Sx32, rt, size); + } + + public static void EmitLoadSx64(ArmEmitterContext context, Operand address, int rt, int size) + { + EmitLoad(context, address, Extension.Sx64, rt, size); + } + + private static void EmitLoad(ArmEmitterContext context, Operand address, Extension ext, int rt, int size) + { + bool isSimd = IsSimd(context); + + if ((uint)size > (isSimd ? 4 : 3)) + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + if (isSimd) + { + EmitReadVector(context, address, context.VectorZero(), rt, 0, size); + } + else + { + EmitReadInt(context, address, rt, size); + } + + if (!isSimd && !(context.CurrOp is OpCode32 && rt == State.RegisterAlias.Aarch32Pc)) + { + Operand value = GetInt(context, rt); + + if (ext == Extension.Sx32 || ext == Extension.Sx64) + { + OperandType destType = ext == Extension.Sx64 ? OperandType.I64 : OperandType.I32; + + switch (size) + { + case 0: value = context.SignExtend8 (destType, value); break; + case 1: value = context.SignExtend16(destType, value); break; + case 2: value = context.SignExtend32(destType, value); break; + } + } + + SetInt(context, rt, value); + } + } + + public static void EmitLoadSimd( + ArmEmitterContext context, + Operand address, + Operand vector, + int rt, + int elem, + int size) + { + EmitReadVector(context, address, vector, rt, elem, size); + } + + public static void EmitStore(ArmEmitterContext context, Operand address, int rt, int size) + { + bool isSimd = IsSimd(context); + + if ((uint)size > (isSimd ? 4 : 3)) + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + if (isSimd) + { + EmitWriteVector(context, address, rt, 0, size); + } + else + { + EmitWriteInt(context, address, rt, size); + } + } + + public static void EmitStoreSimd( + ArmEmitterContext context, + Operand address, + int rt, + int elem, + int size) + { + EmitWriteVector(context, address, rt, elem, size); + } + + private static bool IsSimd(ArmEmitterContext context) + { + return context.CurrOp is IOpCodeSimd && + !(context.CurrOp is OpCodeSimdMemMs || + context.CurrOp is OpCodeSimdMemSs); + } + + public static Operand EmitReadInt(ArmEmitterContext context, Operand address, int size) + { + Operand temp = context.AllocateLocal(size == 3 ? OperandType.I64 : OperandType.I32); + + Operand lblSlowPath = Label(); + Operand lblEnd = Label(); + + Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: false, size); + + Operand value = default; + + switch (size) + { + case 0: value = context.Load8 (physAddr); break; + case 1: value = context.Load16(physAddr); break; + case 2: value = context.Load (OperandType.I32, physAddr); break; + case 3: value = context.Load (OperandType.I64, physAddr); break; + } + + context.Copy(temp, value); + + if (!context.Memory.Type.IsHostMapped()) + { + context.Branch(lblEnd); + + context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold); + + context.Copy(temp, EmitReadIntFallback(context, address, size)); + + context.MarkLabel(lblEnd); + } + + return temp; + } + + private static void EmitReadInt(ArmEmitterContext context, Operand address, int rt, int size) + { + Operand lblSlowPath = Label(); + Operand lblEnd = Label(); + + Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: false, size); + + Operand value = default; + + switch (size) + { + case 0: value = context.Load8 (physAddr); break; + case 1: value = context.Load16(physAddr); break; + case 2: value = context.Load (OperandType.I32, physAddr); break; + case 3: value = context.Load (OperandType.I64, physAddr); break; + } + + SetInt(context, rt, value); + + if (!context.Memory.Type.IsHostMapped()) + { + context.Branch(lblEnd); + + context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold); + + EmitReadIntFallback(context, address, rt, size); + + context.MarkLabel(lblEnd); + } + } + + public static Operand EmitReadIntAligned(ArmEmitterContext context, Operand address, int size) + { + if ((uint)size > 4) + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + Operand physAddr = EmitPtPointerLoad(context, address, default, write: false, size); + + return size switch + { + 0 => context.Load8(physAddr), + 1 => context.Load16(physAddr), + 2 => context.Load(OperandType.I32, physAddr), + 3 => context.Load(OperandType.I64, physAddr), + _ => context.Load(OperandType.V128, physAddr) + }; + } + + private static void EmitReadVector( + ArmEmitterContext context, + Operand address, + Operand vector, + int rt, + int elem, + int size) + { + Operand lblSlowPath = Label(); + Operand lblEnd = Label(); + + Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: false, size); + + Operand value = default; + + switch (size) + { + case 0: value = context.VectorInsert8 (vector, context.Load8(physAddr), elem); break; + case 1: value = context.VectorInsert16(vector, context.Load16(physAddr), elem); break; + case 2: value = context.VectorInsert (vector, context.Load(OperandType.I32, physAddr), elem); break; + case 3: value = context.VectorInsert (vector, context.Load(OperandType.I64, physAddr), elem); break; + case 4: value = context.Load (OperandType.V128, physAddr); break; + } + + context.Copy(GetVec(rt), value); + + if (!context.Memory.Type.IsHostMapped()) + { + context.Branch(lblEnd); + + context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold); + + EmitReadVectorFallback(context, address, vector, rt, elem, size); + + context.MarkLabel(lblEnd); + } + } + + private static Operand VectorCreate(ArmEmitterContext context, Operand value) + { + return context.VectorInsert(context.VectorZero(), value, 0); + } + + private static void EmitWriteInt(ArmEmitterContext context, Operand address, int rt, int size) + { + Operand lblSlowPath = Label(); + Operand lblEnd = Label(); + + Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: true, size); + + Operand value = GetInt(context, rt); + + if (size < 3 && value.Type == OperandType.I64) + { + value = context.ConvertI64ToI32(value); + } + + switch (size) + { + case 0: context.Store8 (physAddr, value); break; + case 1: context.Store16(physAddr, value); break; + case 2: context.Store (physAddr, value); break; + case 3: context.Store (physAddr, value); break; + } + + if (!context.Memory.Type.IsHostMapped()) + { + context.Branch(lblEnd); + + context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold); + + EmitWriteIntFallback(context, address, rt, size); + + context.MarkLabel(lblEnd); + } + } + + public static void EmitWriteIntAligned(ArmEmitterContext context, Operand address, Operand value, int size) + { + if ((uint)size > 4) + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + Operand physAddr = EmitPtPointerLoad(context, address, default, write: true, size); + + if (size < 3 && value.Type == OperandType.I64) + { + value = context.ConvertI64ToI32(value); + } + + if (size == 0) + { + context.Store8(physAddr, value); + } + else if (size == 1) + { + context.Store16(physAddr, value); + } + else + { + context.Store(physAddr, value); + } + } + + private static void EmitWriteVector( + ArmEmitterContext context, + Operand address, + int rt, + int elem, + int size) + { + Operand lblSlowPath = Label(); + Operand lblEnd = Label(); + + Operand physAddr = EmitPtPointerLoad(context, address, lblSlowPath, write: true, size); + + Operand value = GetVec(rt); + + switch (size) + { + case 0: context.Store8 (physAddr, context.VectorExtract8(value, elem)); break; + case 1: context.Store16(physAddr, context.VectorExtract16(value, elem)); break; + case 2: context.Store (physAddr, context.VectorExtract(OperandType.I32, value, elem)); break; + case 3: context.Store (physAddr, context.VectorExtract(OperandType.I64, value, elem)); break; + case 4: context.Store (physAddr, value); break; + } + + if (!context.Memory.Type.IsHostMapped()) + { + context.Branch(lblEnd); + + context.MarkLabel(lblSlowPath, BasicBlockFrequency.Cold); + + EmitWriteVectorFallback(context, address, rt, elem, size); + + context.MarkLabel(lblEnd); + } + } + + public static Operand EmitPtPointerLoad(ArmEmitterContext context, Operand address, Operand lblSlowPath, bool write, int size) + { + if (context.Memory.Type.IsHostMapped()) + { + return EmitHostMappedPointer(context, address); + } + + int ptLevelBits = context.Memory.AddressSpaceBits - PageBits; + int ptLevelSize = 1 << ptLevelBits; + int ptLevelMask = ptLevelSize - 1; + + Operand addrRotated = size != 0 ? context.RotateRight(address, Const(size)) : address; + Operand addrShifted = context.ShiftRightUI(addrRotated, Const(PageBits - size)); + + Operand pte = !context.HasPtc + ? Const(context.Memory.PageTablePointer.ToInt64()) + : Const(context.Memory.PageTablePointer.ToInt64(), Ptc.PageTableSymbol); + + Operand pteOffset = context.BitwiseAnd(addrShifted, Const(addrShifted.Type, ptLevelMask)); + + if (pteOffset.Type == OperandType.I32) + { + pteOffset = context.ZeroExtend32(OperandType.I64, pteOffset); + } + + pte = context.Load(OperandType.I64, context.Add(pte, context.ShiftLeft(pteOffset, Const(3)))); + + if (addrShifted.Type == OperandType.I32) + { + addrShifted = context.ZeroExtend32(OperandType.I64, addrShifted); + } + + // If the VA is out of range, or not aligned to the access size, force PTE to 0 by masking it. + pte = context.BitwiseAnd(pte, context.ShiftRightSI(context.Add(addrShifted, Const(-(long)ptLevelSize)), Const(63))); + + if (lblSlowPath != default) + { + if (write) + { + context.BranchIf(lblSlowPath, pte, Const(0L), Comparison.LessOrEqual); + pte = context.BitwiseAnd(pte, Const(0xffffffffffffUL)); // Ignore any software protection bits. (they are still used by C# memory access) + } + else + { + pte = context.ShiftLeft(pte, Const(1)); + context.BranchIf(lblSlowPath, pte, Const(0L), Comparison.LessOrEqual); + pte = context.ShiftRightUI(pte, Const(1)); + } + } + else + { + // When no label is provided to jump to a slow path if the address is invalid, + // we do the validation ourselves, and throw if needed. + + Operand lblNotWatched = Label(); + + // Is the page currently being tracked for read/write? If so we need to call SignalMemoryTracking. + context.BranchIf(lblNotWatched, pte, Const(0L), Comparison.GreaterOrEqual, BasicBlockFrequency.Cold); + + // Signal memory tracking. Size here doesn't matter as address is assumed to be size aligned here. + context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.SignalMemoryTracking)), address, Const(1UL), Const(write ? 1 : 0)); + context.MarkLabel(lblNotWatched); + + pte = context.BitwiseAnd(pte, Const(0xffffffffffffUL)); // Ignore any software protection bits. (they are still used by C# memory access) + + Operand lblNonNull = Label(); + + // Skip exception if the PTE address is non-null (not zero). + context.BranchIfTrue(lblNonNull, pte, BasicBlockFrequency.Cold); + + // The call is not expected to return (it should throw). + context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ThrowInvalidMemoryAccess)), address); + context.MarkLabel(lblNonNull); + } + + Operand pageOffset = context.BitwiseAnd(address, Const(address.Type, PageMask)); + + if (pageOffset.Type == OperandType.I32) + { + pageOffset = context.ZeroExtend32(OperandType.I64, pageOffset); + } + + return context.Add(pte, pageOffset); + } + + public static Operand EmitHostMappedPointer(ArmEmitterContext context, Operand address) + { + if (address.Type == OperandType.I32) + { + address = context.ZeroExtend32(OperandType.I64, address); + } + + if (context.Memory.Type == MemoryManagerType.HostMapped) + { + Operand mask = Const(ulong.MaxValue >> (64 - context.Memory.AddressSpaceBits)); + address = context.BitwiseAnd(address, mask); + } + + Operand baseAddr = !context.HasPtc + ? Const(context.Memory.PageTablePointer.ToInt64()) + : Const(context.Memory.PageTablePointer.ToInt64(), Ptc.PageTableSymbol); + + return context.Add(baseAddr, address); + } + + private static void EmitReadIntFallback(ArmEmitterContext context, Operand address, int rt, int size) + { + SetInt(context, rt, EmitReadIntFallback(context, address, size)); + } + + private static Operand EmitReadIntFallback(ArmEmitterContext context, Operand address, int size) + { + MethodInfo info = null; + + switch (size) + { + case 0: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadByte)); break; + case 1: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt16)); break; + case 2: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt32)); break; + case 3: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt64)); break; + } + + return context.Call(info, address); + } + + private static void EmitReadVectorFallback( + ArmEmitterContext context, + Operand address, + Operand vector, + int rt, + int elem, + int size) + { + MethodInfo info = null; + + switch (size) + { + case 0: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadByte)); break; + case 1: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt16)); break; + case 2: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt32)); break; + case 3: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt64)); break; + case 4: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadVector128)); break; + } + + Operand value = context.Call(info, address); + + switch (size) + { + case 0: value = context.VectorInsert8 (vector, value, elem); break; + case 1: value = context.VectorInsert16(vector, value, elem); break; + case 2: value = context.VectorInsert (vector, value, elem); break; + case 3: value = context.VectorInsert (vector, value, elem); break; + } + + context.Copy(GetVec(rt), value); + } + + private static void EmitWriteIntFallback(ArmEmitterContext context, Operand address, int rt, int size) + { + MethodInfo info = null; + + switch (size) + { + case 0: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteByte)); break; + case 1: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt16)); break; + case 2: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt32)); break; + case 3: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt64)); break; + } + + Operand value = GetInt(context, rt); + + if (size < 3 && value.Type == OperandType.I64) + { + value = context.ConvertI64ToI32(value); + } + + context.Call(info, address, value); + } + + private static void EmitWriteVectorFallback( + ArmEmitterContext context, + Operand address, + int rt, + int elem, + int size) + { + MethodInfo info = null; + + switch (size) + { + case 0: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteByte)); break; + case 1: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt16)); break; + case 2: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt32)); break; + case 3: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt64)); break; + case 4: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteVector128)); break; + } + + Operand value = default; + + if (size < 4) + { + switch (size) + { + case 0: value = context.VectorExtract8 (GetVec(rt), elem); break; + case 1: value = context.VectorExtract16(GetVec(rt), elem); break; + case 2: value = context.VectorExtract (OperandType.I32, GetVec(rt), elem); break; + case 3: value = context.VectorExtract (OperandType.I64, GetVec(rt), elem); break; + } + } + else + { + value = GetVec(rt); + } + + context.Call(info, address, value); + } + + private static Operand GetInt(ArmEmitterContext context, int rt) + { + return context.CurrOp is OpCode32 ? GetIntA32(context, rt) : GetIntOrZR(context, rt); + } + + private static void SetInt(ArmEmitterContext context, int rt, Operand value) + { + if (context.CurrOp is OpCode32) + { + SetIntA32(context, rt, value); + } + else + { + SetIntOrZR(context, rt, value); + } + } + + // ARM32 helpers. + public static Operand GetMemM(ArmEmitterContext context, bool setCarry = true) + { + switch (context.CurrOp) + { + case IOpCode32MemRsImm op: return GetMShiftedByImmediate(context, op, setCarry); + + case IOpCode32MemReg op: return GetIntA32(context, op.Rm); + + case IOpCode32Mem op: return Const(op.Immediate); + + case OpCode32SimdMemImm op: return Const(op.Immediate); + + default: throw InvalidOpCodeType(context.CurrOp); + } + } + + private static Exception InvalidOpCodeType(OpCode opCode) + { + return new InvalidOperationException($"Invalid OpCode type \"{opCode?.GetType().Name ?? "null"}\"."); + } + + public static Operand GetMShiftedByImmediate(ArmEmitterContext context, IOpCode32MemRsImm op, bool setCarry) + { + Operand m = GetIntA32(context, op.Rm); + + int shift = op.Immediate; + + if (shift == 0) + { + switch (op.ShiftType) + { + case ShiftType.Lsr: shift = 32; break; + case ShiftType.Asr: shift = 32; break; + case ShiftType.Ror: shift = 1; break; + } + } + + if (shift != 0) + { + setCarry &= false; + + switch (op.ShiftType) + { + case ShiftType.Lsl: m = InstEmitAluHelper.GetLslC(context, m, setCarry, shift); break; + case ShiftType.Lsr: m = InstEmitAluHelper.GetLsrC(context, m, setCarry, shift); break; + case ShiftType.Asr: m = InstEmitAluHelper.GetAsrC(context, m, setCarry, shift); break; + case ShiftType.Ror: + if (op.Immediate != 0) + { + m = InstEmitAluHelper.GetRorC(context, m, setCarry, shift); + } + else + { + m = InstEmitAluHelper.GetRrxC(context, m, setCarry); + } + break; + } + } + + return m; + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitMove.cs b/src/ARMeilleure/Instructions/InstEmitMove.cs new file mode 100644 index 00000000..d551bf2d --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitMove.cs @@ -0,0 +1,41 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Movk(ArmEmitterContext context) + { + OpCodeMov op = (OpCodeMov)context.CurrOp; + + OperandType type = op.GetOperandType(); + + Operand res = GetIntOrZR(context, op.Rd); + + res = context.BitwiseAnd(res, Const(type, ~(0xffffL << op.Bit))); + + res = context.BitwiseOr(res, Const(type, op.Immediate)); + + SetIntOrZR(context, op.Rd, res); + } + + public static void Movn(ArmEmitterContext context) + { + OpCodeMov op = (OpCodeMov)context.CurrOp; + + SetIntOrZR(context, op.Rd, Const(op.GetOperandType(), ~op.Immediate)); + } + + public static void Movz(ArmEmitterContext context) + { + OpCodeMov op = (OpCodeMov)context.CurrOp; + + SetIntOrZR(context, op.Rd, Const(op.GetOperandType(), op.Immediate)); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Instructions/InstEmitMul.cs b/src/ARMeilleure/Instructions/InstEmitMul.cs new file mode 100644 index 00000000..65d11b30 --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitMul.cs @@ -0,0 +1,100 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Madd(ArmEmitterContext context) => EmitMul(context, isAdd: true); + public static void Msub(ArmEmitterContext context) => EmitMul(context, isAdd: false); + + private static void EmitMul(ArmEmitterContext context, bool isAdd) + { + OpCodeMul op = (OpCodeMul)context.CurrOp; + + Operand a = GetIntOrZR(context, op.Ra); + Operand n = GetIntOrZR(context, op.Rn); + Operand m = GetIntOrZR(context, op.Rm); + + Operand res = context.Multiply(n, m); + + res = isAdd ? context.Add(a, res) : context.Subtract(a, res); + + SetIntOrZR(context, op.Rd, res); + } + + public static void Smaddl(ArmEmitterContext context) => EmitMull(context, MullFlags.SignedAdd); + public static void Smsubl(ArmEmitterContext context) => EmitMull(context, MullFlags.SignedSubtract); + public static void Umaddl(ArmEmitterContext context) => EmitMull(context, MullFlags.Add); + public static void Umsubl(ArmEmitterContext context) => EmitMull(context, MullFlags.Subtract); + + [Flags] + private enum MullFlags + { + Subtract = 0, + Add = 1 << 0, + Signed = 1 << 1, + + SignedAdd = Signed | Add, + SignedSubtract = Signed | Subtract + } + + private static void EmitMull(ArmEmitterContext context, MullFlags flags) + { + OpCodeMul op = (OpCodeMul)context.CurrOp; + + Operand GetExtendedRegister32(int index) + { + Operand value = GetIntOrZR(context, index); + + if ((flags & MullFlags.Signed) != 0) + { + return context.SignExtend32(value.Type, value); + } + else + { + return context.ZeroExtend32(value.Type, value); + } + } + + Operand a = GetIntOrZR(context, op.Ra); + + Operand n = GetExtendedRegister32(op.Rn); + Operand m = GetExtendedRegister32(op.Rm); + + Operand res = context.Multiply(n, m); + + res = (flags & MullFlags.Add) != 0 ? context.Add(a, res) : context.Subtract(a, res); + + SetIntOrZR(context, op.Rd, res); + } + + public static void Smulh(ArmEmitterContext context) + { + OpCodeMul op = (OpCodeMul)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + Operand m = GetIntOrZR(context, op.Rm); + + Operand d = context.Multiply64HighSI(n, m); + + SetIntOrZR(context, op.Rd, d); + } + + public static void Umulh(ArmEmitterContext context) + { + OpCodeMul op = (OpCodeMul)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + Operand m = GetIntOrZR(context, op.Rm); + + Operand d = context.Multiply64HighUI(n, m); + + SetIntOrZR(context, op.Rd, d); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Instructions/InstEmitMul32.cs b/src/ARMeilleure/Instructions/InstEmitMul32.cs new file mode 100644 index 00000000..0822f92c --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitMul32.cs @@ -0,0 +1,379 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitAluHelper; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + [Flags] + private enum MullFlags + { + Subtract = 1, + Add = 1 << 1, + Signed = 1 << 2, + + SignedAdd = Signed | Add, + SignedSubtract = Signed | Subtract + } + + public static void Mla(ArmEmitterContext context) + { + IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + Operand a = GetIntA32(context, op.Ra); + + Operand res = context.Add(a, context.Multiply(n, m)); + + if (ShouldSetFlags(context)) + { + EmitNZFlagsCheck(context, res); + } + + EmitAluStore(context, res); + } + + public static void Mls(ArmEmitterContext context) + { + IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp; + + Operand n = GetAluN(context); + Operand m = GetAluM(context); + Operand a = GetIntA32(context, op.Ra); + + Operand res = context.Subtract(a, context.Multiply(n, m)); + + EmitAluStore(context, res); + } + + public static void Smmla(ArmEmitterContext context) + { + EmitSmmul(context, MullFlags.SignedAdd); + } + + public static void Smmls(ArmEmitterContext context) + { + EmitSmmul(context, MullFlags.SignedSubtract); + } + + public static void Smmul(ArmEmitterContext context) + { + EmitSmmul(context, MullFlags.Signed); + } + + private static void EmitSmmul(ArmEmitterContext context, MullFlags flags) + { + IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp; + + Operand n = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rn)); + Operand m = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rm)); + + Operand res = context.Multiply(n, m); + + if (flags.HasFlag(MullFlags.Add) && op.Ra != 0xf) + { + res = context.Add(context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Ra)), Const(32)), res); + } + else if (flags.HasFlag(MullFlags.Subtract)) + { + res = context.Subtract(context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Ra)), Const(32)), res); + } + + if (op.R) + { + res = context.Add(res, Const(0x80000000L)); + } + + Operand hi = context.ConvertI64ToI32(context.ShiftRightSI(res, Const(32))); + + EmitGenericAluStoreA32(context, op.Rd, false, hi); + } + + public static void Smla__(ArmEmitterContext context) + { + IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand m = GetIntA32(context, op.Rm); + Operand a = GetIntA32(context, op.Ra); + + if (op.NHigh) + { + n = context.SignExtend16(OperandType.I64, context.ShiftRightUI(n, Const(16))); + } + else + { + n = context.SignExtend16(OperandType.I64, n); + } + + if (op.MHigh) + { + m = context.SignExtend16(OperandType.I64, context.ShiftRightUI(m, Const(16))); + } + else + { + m = context.SignExtend16(OperandType.I64, m); + } + + Operand res = context.Multiply(n, m); + + Operand toAdd = context.SignExtend32(OperandType.I64, a); + res = context.Add(res, toAdd); + Operand q = context.ICompareNotEqual(res, context.SignExtend32(OperandType.I64, res)); + res = context.ConvertI64ToI32(res); + + UpdateQFlag(context, q); + + EmitGenericAluStoreA32(context, op.Rd, false, res); + } + + public static void Smlal(ArmEmitterContext context) + { + EmitMlal(context, true); + } + + public static void Smlal__(ArmEmitterContext context) + { + IOpCode32AluUmull op = (IOpCode32AluUmull)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand m = GetIntA32(context, op.Rm); + + if (op.NHigh) + { + n = context.SignExtend16(OperandType.I64, context.ShiftRightUI(n, Const(16))); + } + else + { + n = context.SignExtend16(OperandType.I64, n); + } + + if (op.MHigh) + { + m = context.SignExtend16(OperandType.I64, context.ShiftRightUI(m, Const(16))); + } + else + { + m = context.SignExtend16(OperandType.I64, m); + } + + Operand res = context.Multiply(n, m); + + Operand toAdd = context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdHi)), Const(32)); + toAdd = context.BitwiseOr(toAdd, context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdLo))); + res = context.Add(res, toAdd); + + Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32))); + Operand lo = context.ConvertI64ToI32(res); + + EmitGenericAluStoreA32(context, op.RdHi, false, hi); + EmitGenericAluStoreA32(context, op.RdLo, false, lo); + } + + public static void Smlaw_(ArmEmitterContext context) + { + IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand m = GetIntA32(context, op.Rm); + Operand a = GetIntA32(context, op.Ra); + + if (op.MHigh) + { + m = context.SignExtend16(OperandType.I64, context.ShiftRightUI(m, Const(16))); + } + else + { + m = context.SignExtend16(OperandType.I64, m); + } + + Operand res = context.Multiply(context.SignExtend32(OperandType.I64, n), m); + + Operand toAdd = context.ShiftLeft(context.SignExtend32(OperandType.I64, a), Const(16)); + res = context.Add(res, toAdd); + res = context.ShiftRightSI(res, Const(16)); + Operand q = context.ICompareNotEqual(res, context.SignExtend32(OperandType.I64, res)); + res = context.ConvertI64ToI32(res); + + UpdateQFlag(context, q); + + EmitGenericAluStoreA32(context, op.Rd, false, res); + } + + public static void Smul__(ArmEmitterContext context) + { + IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand m = GetIntA32(context, op.Rm); + + if (op.NHigh) + { + n = context.ShiftRightSI(n, Const(16)); + } + else + { + n = context.SignExtend16(OperandType.I32, n); + } + + if (op.MHigh) + { + m = context.ShiftRightSI(m, Const(16)); + } + else + { + m = context.SignExtend16(OperandType.I32, m); + } + + Operand res = context.Multiply(n, m); + + EmitGenericAluStoreA32(context, op.Rd, false, res); + } + + public static void Smull(ArmEmitterContext context) + { + IOpCode32AluUmull op = (IOpCode32AluUmull)context.CurrOp; + + Operand n = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rn)); + Operand m = context.SignExtend32(OperandType.I64, GetIntA32(context, op.Rm)); + + Operand res = context.Multiply(n, m); + + Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32))); + Operand lo = context.ConvertI64ToI32(res); + + if (ShouldSetFlags(context)) + { + EmitNZFlagsCheck(context, res); + } + + EmitGenericAluStoreA32(context, op.RdHi, ShouldSetFlags(context), hi); + EmitGenericAluStoreA32(context, op.RdLo, ShouldSetFlags(context), lo); + } + + public static void Smulw_(ArmEmitterContext context) + { + IOpCode32AluMla op = (IOpCode32AluMla)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand m = GetIntA32(context, op.Rm); + + if (op.MHigh) + { + m = context.SignExtend16(OperandType.I64, context.ShiftRightUI(m, Const(16))); + } + else + { + m = context.SignExtend16(OperandType.I64, m); + } + + Operand res = context.Multiply(context.SignExtend32(OperandType.I64, n), m); + + res = context.ShiftRightUI(res, Const(16)); + res = context.ConvertI64ToI32(res); + + EmitGenericAluStoreA32(context, op.Rd, false, res); + } + + public static void Umaal(ArmEmitterContext context) + { + IOpCode32AluUmull op = (IOpCode32AluUmull)context.CurrOp; + + Operand n = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rn)); + Operand m = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rm)); + Operand dHi = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdHi)); + Operand dLo = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdLo)); + + Operand res = context.Multiply(n, m); + res = context.Add(res, dHi); + res = context.Add(res, dLo); + + Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32))); + Operand lo = context.ConvertI64ToI32(res); + + EmitGenericAluStoreA32(context, op.RdHi, false, hi); + EmitGenericAluStoreA32(context, op.RdLo, false, lo); + } + + public static void Umlal(ArmEmitterContext context) + { + EmitMlal(context, false); + } + + public static void Umull(ArmEmitterContext context) + { + IOpCode32AluUmull op = (IOpCode32AluUmull)context.CurrOp; + + Operand n = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rn)); + Operand m = context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.Rm)); + + Operand res = context.Multiply(n, m); + + Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32))); + Operand lo = context.ConvertI64ToI32(res); + + if (ShouldSetFlags(context)) + { + EmitNZFlagsCheck(context, res); + } + + EmitGenericAluStoreA32(context, op.RdHi, ShouldSetFlags(context), hi); + EmitGenericAluStoreA32(context, op.RdLo, ShouldSetFlags(context), lo); + } + + private static void EmitMlal(ArmEmitterContext context, bool signed) + { + IOpCode32AluUmull op = (IOpCode32AluUmull)context.CurrOp; + + Operand n = GetIntA32(context, op.Rn); + Operand m = GetIntA32(context, op.Rm); + + if (signed) + { + n = context.SignExtend32(OperandType.I64, n); + m = context.SignExtend32(OperandType.I64, m); + } + else + { + n = context.ZeroExtend32(OperandType.I64, n); + m = context.ZeroExtend32(OperandType.I64, m); + } + + Operand res = context.Multiply(n, m); + + Operand toAdd = context.ShiftLeft(context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdHi)), Const(32)); + toAdd = context.BitwiseOr(toAdd, context.ZeroExtend32(OperandType.I64, GetIntA32(context, op.RdLo))); + res = context.Add(res, toAdd); + + Operand hi = context.ConvertI64ToI32(context.ShiftRightUI(res, Const(32))); + Operand lo = context.ConvertI64ToI32(res); + + if (ShouldSetFlags(context)) + { + EmitNZFlagsCheck(context, res); + } + + EmitGenericAluStoreA32(context, op.RdHi, ShouldSetFlags(context), hi); + EmitGenericAluStoreA32(context, op.RdLo, ShouldSetFlags(context), lo); + } + + private static void UpdateQFlag(ArmEmitterContext context, Operand q) + { + Operand lblSkipSetQ = Label(); + + context.BranchIfFalse(lblSkipSetQ, q); + + SetFlag(context, PState.QFlag, Const(1)); + + context.MarkLabel(lblSkipSetQ); + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs new file mode 100644 index 00000000..7e7f26b1 --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic.cs @@ -0,0 +1,5224 @@ +// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h +// https://www.agner.org/optimize/#vectorclass @ vectori128.h + +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper32; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + using Func2I = Func<Operand, Operand, Operand>; + + static partial class InstEmit + { + public static void Abs_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64AbsS); + } + else + { + EmitScalarUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + } + } + + public static void Abs_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64AbsV); + } + else + { + EmitVectorUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + } + } + + public static void Add_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64AddS); + } + else + { + EmitScalarBinaryOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Add_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AddV); + } + else if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + Operand res = context.AddIntrinsic(addInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Addhn_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64AddhnV); + } + else + { + EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: false); + } + } + + public static void Addp_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64AddpS); + } + else + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand ne0 = EmitVectorExtractZx(context, op.Rn, 0, op.Size); + Operand ne1 = EmitVectorExtractZx(context, op.Rn, 1, op.Size); + + Operand res = context.Add(ne0, ne1); + + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, op.Size)); + } + } + + public static void Addp_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AddpV); + } + else if (Optimizations.UseSsse3) + { + EmitSsse3VectorPairwiseOp(context, X86PaddInstruction); + } + else + { + EmitVectorPairwiseOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Addv_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64AddvV); + } + else + { + EmitVectorAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Cls_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ClsV); + } + else + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + int eSize = 8 << op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingSigns)), ne, Const(eSize)); + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Clz_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ClzV); + } + else + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int eSize = 8 << op.Size; + + Operand res = eSize switch { + 8 => Clz_V_I8 (context, GetVec(op.Rn)), + 16 => Clz_V_I16(context, GetVec(op.Rn)), + 32 => Clz_V_I32(context, GetVec(op.Rn)), + _ => default + }; + + if (res != default) + { + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + } + else + { + int elems = op.GetBytesCount() >> op.Size; + + res = context.VectorZero(); + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + Operand de = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingZeros)), ne, Const(eSize)); + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + } + + context.Copy(GetVec(op.Rd), res); + } + } + + private static Operand Clz_V_I8(ArmEmitterContext context, Operand arg) + { + if (!Optimizations.UseSsse3) + { + return default; + } + + // CLZ nibble table. + Operand clzTable = X86GetScalar(context, 0x01_01_01_01_02_02_03_04); + + Operand maskLow = X86GetAllElements(context, 0x0f_0f_0f_0f); + Operand c04 = X86GetAllElements(context, 0x04_04_04_04); + + // CLZ of low 4 bits of elements in arg. + Operand loClz = context.AddIntrinsic(Intrinsic.X86Pshufb, clzTable, arg); + + // Get the high 4 bits of elements in arg. + Operand hiArg = context.AddIntrinsic(Intrinsic.X86Psrlw, arg, Const(4)); + hiArg = context.AddIntrinsic(Intrinsic.X86Pand, hiArg, maskLow); + + // CLZ of high 4 bits of elements in arg. + Operand hiClz = context.AddIntrinsic(Intrinsic.X86Pshufb, clzTable, hiArg); + + // If high 4 bits are not all zero, we discard the CLZ of the low 4 bits. + Operand mask = context.AddIntrinsic(Intrinsic.X86Pcmpeqb, hiClz, c04); + loClz = context.AddIntrinsic(Intrinsic.X86Pand, loClz, mask); + + return context.AddIntrinsic(Intrinsic.X86Paddb, loClz, hiClz); + } + + private static Operand Clz_V_I16(ArmEmitterContext context, Operand arg) + { + if (!Optimizations.UseSsse3) + { + return default; + } + + Operand maskSwap = X86GetElements(context, 0x80_0f_80_0d_80_0b_80_09, 0x80_07_80_05_80_03_80_01); + Operand maskLow = X86GetAllElements(context, 0x00ff_00ff); + Operand c0008 = X86GetAllElements(context, 0x0008_0008); + + // CLZ pair of high 8 and low 8 bits of elements in arg. + Operand hiloClz = Clz_V_I8(context, arg); + // Get CLZ of low 8 bits in each pair. + Operand loClz = context.AddIntrinsic(Intrinsic.X86Pand, hiloClz, maskLow); + // Get CLZ of high 8 bits in each pair. + Operand hiClz = context.AddIntrinsic(Intrinsic.X86Pshufb, hiloClz, maskSwap); + + // If high 8 bits are not all zero, we discard the CLZ of the low 8 bits. + Operand mask = context.AddIntrinsic(Intrinsic.X86Pcmpeqw, hiClz, c0008); + loClz = context.AddIntrinsic(Intrinsic.X86Pand, loClz, mask); + + return context.AddIntrinsic(Intrinsic.X86Paddw, loClz, hiClz); + } + + private static Operand Clz_V_I32(ArmEmitterContext context, Operand arg) + { + // TODO: Use vplzcntd when AVX-512 is supported. + if (!Optimizations.UseSse2) + { + return default; + } + + Operand AddVectorI32(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Paddd, op0, op1); + Operand SubVectorI32(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Psubd, op0, op1); + Operand ShiftRightVectorUI32(Operand op0, int imm8) => context.AddIntrinsic(Intrinsic.X86Psrld, op0, Const(imm8)); + Operand OrVector(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Por, op0, op1); + Operand AndVector(Operand op0, Operand op1) => context.AddIntrinsic(Intrinsic.X86Pand, op0, op1); + Operand NotVector(Operand op0) => context.AddIntrinsic(Intrinsic.X86Pandn, op0, context.VectorOne()); + + Operand c55555555 = X86GetAllElements(context, 0x55555555); + Operand c33333333 = X86GetAllElements(context, 0x33333333); + Operand c0f0f0f0f = X86GetAllElements(context, 0x0f0f0f0f); + Operand c0000003f = X86GetAllElements(context, 0x0000003f); + + Operand tmp0; + Operand tmp1; + Operand res; + + // Set all bits after highest set bit to 1. + res = OrVector(ShiftRightVectorUI32(arg, 1), arg); + res = OrVector(ShiftRightVectorUI32(res, 2), res); + res = OrVector(ShiftRightVectorUI32(res, 4), res); + res = OrVector(ShiftRightVectorUI32(res, 8), res); + res = OrVector(ShiftRightVectorUI32(res, 16), res); + + // Make leading 0s into leading 1s. + res = NotVector(res); + + // Count leading 1s, which is the population count. + tmp0 = ShiftRightVectorUI32(res, 1); + tmp0 = AndVector(tmp0, c55555555); + res = SubVectorI32(res, tmp0); + + tmp0 = ShiftRightVectorUI32(res, 2); + tmp0 = AndVector(tmp0, c33333333); + tmp1 = AndVector(res, c33333333); + res = AddVectorI32(tmp0, tmp1); + + tmp0 = ShiftRightVectorUI32(res, 4); + tmp0 = AddVectorI32(tmp0, res); + res = AndVector(tmp0, c0f0f0f0f); + + tmp0 = ShiftRightVectorUI32(res, 8); + res = AddVectorI32(tmp0, res); + + tmp0 = ShiftRightVectorUI32(res, 16); + res = AddVectorI32(tmp0, res); + + res = AndVector(res, c0000003f); + + return res; + } + + public static void Cnt_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64CntV); + } + else + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0); + + Operand de; + + if (Optimizations.UsePopCnt) + { + de = context.AddIntrinsicLong(Intrinsic.X86Popcnt, ne); + } + else + { + de = EmitCountSetBits8(context, ne); + } + + res = EmitVectorInsert(context, res, de, index, 0); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Fabd_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FabdS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand res = context.AddIntrinsic(Intrinsic.X86Subss, GetVec(op.Rn), GetVec(op.Rm)); + + res = EmitFloatAbs(context, res, true, false); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (sizeF == 1) */ + { + Operand res = context.AddIntrinsic(Intrinsic.X86Subsd, GetVec(op.Rn), GetVec(op.Rm)); + + res = EmitFloatAbs(context, res, false, false); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, op2); + + return EmitUnaryMathCall(context, nameof(Math.Abs), res); + }); + } + } + + public static void Fabd_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FabdV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand res = context.AddIntrinsic(Intrinsic.X86Subps, GetVec(op.Rn), GetVec(op.Rm)); + + res = EmitFloatAbs(context, res, true, true); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand res = context.AddIntrinsic(Intrinsic.X86Subpd, GetVec(op.Rn), GetVec(op.Rm)); + + res = EmitFloatAbs(context, res, false, true); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, op2); + + return EmitUnaryMathCall(context, nameof(Math.Abs), res); + }); + } + } + + public static void Fabs_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FabsS); + } + else if (Optimizations.UseSse2) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + if (op.Size == 0) + { + Operand res = EmitFloatAbs(context, GetVec(op.Rn), true, false); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (op.Size == 1) */ + { + Operand res = EmitFloatAbs(context, GetVec(op.Rn), false, false); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, nameof(Math.Abs), op1); + }); + } + } + + public static void Fabs_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FabsV); + } + else if (Optimizations.UseSse2) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand res = EmitFloatAbs(context, GetVec(op.Rn), true, true); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand res = EmitFloatAbs(context, GetVec(op.Rn), false, true); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, nameof(Math.Abs), op1); + }); + } + } + + public static void Fadd_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FaddS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF(context, Intrinsic.X86Addss, Intrinsic.X86Addsd); + } + else if (Optimizations.FastFP) + { + EmitScalarBinaryOpF(context, (op1, op2) => context.Add(op1, op2)); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2); + }); + } + } + + public static void Fadd_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FaddV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF(context, Intrinsic.X86Addps, Intrinsic.X86Addpd); + } + else if (Optimizations.FastFP) + { + EmitVectorBinaryOpF(context, (op1, op2) => context.Add(op1, op2)); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2); + }); + } + } + + public static void Faddp_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FaddpS); + } + else if (Optimizations.FastFP && Optimizations.UseSse3) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + if ((op.Size & 1) == 0) + { + Operand res = context.AddIntrinsic(Intrinsic.X86Haddps, GetVec(op.Rn), GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if ((op.Size & 1) == 1) */ + { + Operand res = context.AddIntrinsic(Intrinsic.X86Haddpd, GetVec(op.Rn), GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarPairwiseOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2); + }); + } + } + + public static void Faddp_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FaddpV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse2VectorPairwiseOpF(context, (op1, op2) => + { + return EmitSse41ProcessNaNsOpF(context, (op1, op2) => + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + Intrinsic addInst = (op.Size & 1) == 0 ? Intrinsic.X86Addps : Intrinsic.X86Addpd; + + return context.AddIntrinsic(addInst, op1, op2); + }, scalar: false, op1, op2); + }); + } + else + { + EmitVectorPairwiseOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2); + }); + } + } + + public static void Fdiv_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FdivS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF(context, Intrinsic.X86Divss, Intrinsic.X86Divsd); + } + else if (Optimizations.FastFP) + { + EmitScalarBinaryOpF(context, (op1, op2) => context.Divide(op1, op2)); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPDiv), op1, op2); + }); + } + } + + public static void Fdiv_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FdivV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF(context, Intrinsic.X86Divps, Intrinsic.X86Divpd); + } + else if (Optimizations.FastFP) + { + EmitVectorBinaryOpF(context, (op1, op2) => context.Divide(op1, op2)); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPDiv), op1, op2); + }); + } + } + + public static void Fmadd_S(ArmEmitterContext context) // Fused. + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FmaddS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand a = GetVec(op.Ra); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res; + + if (op.Size == 0) + { + if (Optimizations.UseFma) + { + res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ss, a, n, m); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m); + res = context.AddIntrinsic(Intrinsic.X86Addss, a, res); + } + + context.Copy(d, context.VectorZeroUpper96(res)); + } + else /* if (op.Size == 1) */ + { + if (Optimizations.UseFma) + { + res = context.AddIntrinsic(Intrinsic.X86Vfmadd231sd, a, n, m); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m); + res = context.AddIntrinsic(Intrinsic.X86Addsd, a, res); + } + + context.Copy(d, context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarTernaryRaOpF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3); + }); + } + } + + public static void Fmax_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmaxS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse41ProcessNaNsOpF(context, (op1, op2) => + { + return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true); + }, scalar: true); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2); + }); + } + } + + public static void Fmax_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse41ProcessNaNsOpF(context, (op1, op2) => + { + return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true); + }, scalar: false); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2); + }); + } + } + + public static void Fmaxnm_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmaxnmS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: true); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2); + }); + } + } + + public static void Fmaxnm_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxnmV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2); + }); + } + } + + public static void Fmaxnmp_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FmaxnmpS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse2ScalarPairwiseOpF(context, (op1, op2) => + { + return EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: true, op1, op2); + }); + } + else + { + EmitScalarPairwiseOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2); + }); + } + } + + public static void Fmaxnmp_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxnmpV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse2VectorPairwiseOpF(context, (op1, op2) => + { + return EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false, op1, op2); + }); + } + else + { + EmitVectorPairwiseOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2); + }); + } + } + + public static void Fmaxnmv_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FmaxnmvV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse2VectorAcrossVectorOpF(context, (op1, op2) => + { + return EmitSse41MaxMinNumOpF(context, isMaxNum: true, scalar: false, op1, op2); + }); + } + else + { + EmitVectorAcrossVectorOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2); + }); + } + } + + public static void Fmaxp_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmaxpV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse2VectorPairwiseOpF(context, (op1, op2) => + { + return EmitSse41ProcessNaNsOpF(context, (op1, op2) => + { + return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true); + }, scalar: false, op1, op2); + }); + } + else + { + EmitVectorPairwiseOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2); + }); + } + } + + public static void Fmaxv_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FmaxvV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse2VectorAcrossVectorOpF(context, (op1, op2) => + { + return EmitSse41ProcessNaNsOpF(context, (op1, op2) => + { + return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: true); + }, scalar: false, op1, op2); + }); + } + else + { + EmitVectorAcrossVectorOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMax), op1, op2); + }); + } + } + + public static void Fmin_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FminS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse41ProcessNaNsOpF(context, (op1, op2) => + { + return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false); + }, scalar: true); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2); + }); + } + } + + public static void Fmin_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse41ProcessNaNsOpF(context, (op1, op2) => + { + return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false); + }, scalar: false); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2); + }); + } + } + + public static void Fminnm_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FminnmS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: true); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2); + }); + } + } + + public static void Fminnm_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminnmV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2); + }); + } + } + + public static void Fminnmp_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FminnmpS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse2ScalarPairwiseOpF(context, (op1, op2) => + { + return EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: true, op1, op2); + }); + } + else + { + EmitScalarPairwiseOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2); + }); + } + } + + public static void Fminnmp_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminnmpV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse2VectorPairwiseOpF(context, (op1, op2) => + { + return EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false, op1, op2); + }); + } + else + { + EmitVectorPairwiseOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2); + }); + } + } + + public static void Fminnmv_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FminnmvV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse2VectorAcrossVectorOpF(context, (op1, op2) => + { + return EmitSse41MaxMinNumOpF(context, isMaxNum: false, scalar: false, op1, op2); + }); + } + else + { + EmitVectorAcrossVectorOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2); + }); + } + } + + public static void Fminp_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FminpV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse2VectorPairwiseOpF(context, (op1, op2) => + { + return EmitSse41ProcessNaNsOpF(context, (op1, op2) => + { + return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false); + }, scalar: false, op1, op2); + }); + } + else + { + EmitVectorPairwiseOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2); + }); + } + } + + public static void Fminv_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FminvV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse2VectorAcrossVectorOpF(context, (op1, op2) => + { + return EmitSse41ProcessNaNsOpF(context, (op1, op2) => + { + return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: false); + }, scalar: false, op1, op2); + }); + } + else + { + EmitVectorAcrossVectorOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMin), op1, op2); + }); + } + } + + public static void Fmla_Se(ArmEmitterContext context) // Fused. + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarTernaryOpFRdByElem(context, Intrinsic.Arm64FmlaSe); + } + else if (Optimizations.UseFma) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6; + + Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask)); + + res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ss, d, n, res); + + context.Copy(d, context.VectorZeroUpper96(res)); + } + else /* if (sizeF == 1) */ + { + int shuffleMask = op.Index | op.Index << 1; + + Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask)); + + res = context.AddIntrinsic(Intrinsic.X86Vfmadd231sd, d, n, res); + + context.Copy(d, context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarTernaryOpByElemF(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Fmla_V(ArmEmitterContext context) // Fused. + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpFRd(context, Intrinsic.Arm64FmlaV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + Operand res; + + if (sizeF == 0) + { + if (Optimizations.UseFma) + { + res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ps, d, n, m); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m); + res = context.AddIntrinsic(Intrinsic.X86Addps, d, res); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else /* if (sizeF == 1) */ + { + if (Optimizations.UseFma) + { + res = context.AddIntrinsic(Intrinsic.X86Vfmadd231pd, d, n, m); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m); + res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res); + } + + context.Copy(d, res); + } + } + else + { + EmitVectorTernaryOpF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3); + }); + } + } + + public static void Fmla_Ve(ArmEmitterContext context) // Fused. + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpFRdByElem(context, Intrinsic.Arm64FmlaVe); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6; + + Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask)); + + if (Optimizations.UseFma) + { + res = context.AddIntrinsic(Intrinsic.X86Vfmadd231ps, d, n, res); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res); + res = context.AddIntrinsic(Intrinsic.X86Addps, d, res); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else /* if (sizeF == 1) */ + { + int shuffleMask = op.Index | op.Index << 1; + + Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask)); + + if (Optimizations.UseFma) + { + res = context.AddIntrinsic(Intrinsic.X86Vfmadd231pd, d, n, res); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res); + res = context.AddIntrinsic(Intrinsic.X86Addpd, d, res); + } + + context.Copy(d, res); + } + } + else + { + EmitVectorTernaryOpByElemF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3); + }); + } + } + + public static void Fmls_Se(ArmEmitterContext context) // Fused. + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarTernaryOpFRdByElem(context, Intrinsic.Arm64FmlsSe); + } + else if (Optimizations.UseFma) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6; + + Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask)); + + res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, d, n, res); + + context.Copy(d, context.VectorZeroUpper96(res)); + } + else /* if (sizeF == 1) */ + { + int shuffleMask = op.Index | op.Index << 1; + + Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask)); + + res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, d, n, res); + + context.Copy(d, context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarTernaryOpByElemF(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Fmls_V(ArmEmitterContext context) // Fused. + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpFRd(context, Intrinsic.Arm64FmlsV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + Operand res; + + if (sizeF == 0) + { + if (Optimizations.UseFma) + { + res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, d, n, m); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m); + res = context.AddIntrinsic(Intrinsic.X86Subps, d, res); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else /* if (sizeF == 1) */ + { + if (Optimizations.UseFma) + { + res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, d, n, m); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m); + res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res); + } + + context.Copy(d, res); + } + } + else + { + EmitVectorTernaryOpF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3); + }); + } + } + + public static void Fmls_Ve(ArmEmitterContext context) // Fused. + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpFRdByElem(context, Intrinsic.Arm64FmlsVe); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6; + + Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask)); + + if (Optimizations.UseFma) + { + res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, d, n, res); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res); + res = context.AddIntrinsic(Intrinsic.X86Subps, d, res); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else /* if (sizeF == 1) */ + { + int shuffleMask = op.Index | op.Index << 1; + + Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask)); + + if (Optimizations.UseFma) + { + res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, d, n, res); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res); + res = context.AddIntrinsic(Intrinsic.X86Subpd, d, res); + } + + context.Copy(d, res); + } + } + else + { + EmitVectorTernaryOpByElemF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3); + }); + } + } + + public static void Fmsub_S(ArmEmitterContext context) // Fused. + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FmsubS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand a = GetVec(op.Ra); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res; + + if (op.Size == 0) + { + if (Optimizations.UseFma) + { + res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, a, n, m); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m); + res = context.AddIntrinsic(Intrinsic.X86Subss, a, res); + } + + context.Copy(d, context.VectorZeroUpper96(res)); + } + else /* if (op.Size == 1) */ + { + if (Optimizations.UseFma) + { + res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, a, n, m); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m); + res = context.AddIntrinsic(Intrinsic.X86Subsd, a, res); + } + + context.Copy(d, context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarTernaryRaOpF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3); + }); + } + } + + public static void Fmul_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmulS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd); + } + else if (Optimizations.FastFP) + { + EmitScalarBinaryOpF(context, (op1, op2) => context.Multiply(op1, op2)); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op1, op2); + }); + } + } + + public static void Fmul_Se(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpFByElem(context, Intrinsic.Arm64FmulSe); + } + else + { + EmitScalarBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2)); + } + } + + public static void Fmul_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmulV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd); + } + else if (Optimizations.FastFP) + { + EmitVectorBinaryOpF(context, (op1, op2) => context.Multiply(op1, op2)); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op1, op2); + }); + } + } + + public static void Fmul_Ve(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpFByElem(context, Intrinsic.Arm64FmulVe); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + int shuffleMask = op.Index | op.Index << 2 | op.Index << 4 | op.Index << 6; + + Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(shuffleMask)); + + res = context.AddIntrinsic(Intrinsic.X86Mulps, n, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + int shuffleMask = op.Index | op.Index << 1; + + Operand res = context.AddIntrinsic(Intrinsic.X86Shufpd, m, m, Const(shuffleMask)); + + res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, res); + + context.Copy(GetVec(op.Rd), res); + } + } + else if (Optimizations.FastFP) + { + EmitVectorBinaryOpByElemF(context, (op1, op2) => context.Multiply(op1, op2)); + } + else + { + EmitVectorBinaryOpByElemF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op1, op2); + }); + } + } + + public static void Fmulx_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FmulxS); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); + }); + } + } + + public static void Fmulx_Se(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpFByElem(context, Intrinsic.Arm64FmulxSe); + } + else + { + EmitScalarBinaryOpByElemF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); + }); + } + } + + public static void Fmulx_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FmulxV); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); + }); + } + } + + public static void Fmulx_Ve(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpFByElem(context, Intrinsic.Arm64FmulxVe); + } + else + { + EmitVectorBinaryOpByElemF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulX), op1, op2); + }); + } + } + + public static void Fneg_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FnegS); + } + else if (Optimizations.UseSse2) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + if (op.Size == 0) + { + Operand mask = X86GetScalar(context, -0f); + + Operand res = context.AddIntrinsic(Intrinsic.X86Xorps, mask, GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (op.Size == 1) */ + { + Operand mask = X86GetScalar(context, -0d); + + Operand res = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarUnaryOpF(context, (op1) => context.Negate(op1)); + } + } + + public static void Fneg_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FnegV); + } + else if (Optimizations.UseSse2) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand mask = X86GetAllElements(context, -0f); + + Operand res = context.AddIntrinsic(Intrinsic.X86Xorps, mask, GetVec(op.Rn)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand mask = X86GetAllElements(context, -0d); + + Operand res = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, GetVec(op.Rn)); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorUnaryOpF(context, (op1) => context.Negate(op1)); + } + } + + public static void Fnmadd_S(ArmEmitterContext context) // Fused. + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FnmaddS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand a = GetVec(op.Ra); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res; + + if (op.Size == 0) + { + if (Optimizations.UseFma) + { + res = context.AddIntrinsic(Intrinsic.X86Vfnmsub231ss, a, n, m); + } + else + { + Operand mask = X86GetScalar(context, -0f); + Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorps, mask, a); + + res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m); + res = context.AddIntrinsic(Intrinsic.X86Subss, aNeg, res); + } + + context.Copy(d, context.VectorZeroUpper96(res)); + } + else /* if (op.Size == 1) */ + { + if (Optimizations.UseFma) + { + res = context.AddIntrinsic(Intrinsic.X86Vfnmsub231sd, a, n, m); + } + else + { + Operand mask = X86GetScalar(context, -0d); + Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, a); + + res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m); + res = context.AddIntrinsic(Intrinsic.X86Subsd, aNeg, res); + } + + context.Copy(d, context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarTernaryRaOpF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulAdd), op1, op2, op3); + }); + } + } + + public static void Fnmsub_S(ArmEmitterContext context) // Fused. + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarTernaryOpF(context, Intrinsic.Arm64FnmsubS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand a = GetVec(op.Ra); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res; + + if (op.Size == 0) + { + if (Optimizations.UseFma) + { + res = context.AddIntrinsic(Intrinsic.X86Vfmsub231ss, a, n, m); + } + else + { + Operand mask = X86GetScalar(context, -0f); + Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorps, mask, a); + + res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m); + res = context.AddIntrinsic(Intrinsic.X86Addss, aNeg, res); + } + + context.Copy(d, context.VectorZeroUpper96(res)); + } + else /* if (op.Size == 1) */ + { + if (Optimizations.UseFma) + { + res = context.AddIntrinsic(Intrinsic.X86Vfmsub231sd, a, n, m); + } + else + { + Operand mask = X86GetScalar(context, -0d); + Operand aNeg = context.AddIntrinsic(Intrinsic.X86Xorpd, mask, a); + + res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m); + res = context.AddIntrinsic(Intrinsic.X86Addsd, aNeg, res); + } + + context.Copy(d, context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarTernaryRaOpF(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulSub), op1, op2, op3); + }); + } + } + + public static void Fnmul_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FnmulS); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => context.Negate(context.Multiply(op1, op2))); + } + } + + public static void Frecpe_S(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrecpeS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) + { + Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rcpss, GetVec(op.Rn)), scalar: true); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipEstimate), op1); + }); + } + } + + public static void Frecpe_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrecpeV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) + { + Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rcpps, GetVec(op.Rn)), scalar: false); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipEstimate), op1); + }); + } + } + + public static void Frecps_S(ArmEmitterContext context) // Fused. + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrecpsS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + Operand res; + + if (sizeF == 0) + { + Operand mask = X86GetScalar(context, 2f); + + if (Optimizations.UseFma) + { + res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, mask, n, m); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m); + res = context.AddIntrinsic(Intrinsic.X86Subss, mask, res); + } + + res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: true, sizeF); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (sizeF == 1) */ + { + Operand mask = X86GetScalar(context, 2d); + + if (Optimizations.UseFma) + { + res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, mask, n, m); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m); + res = context.AddIntrinsic(Intrinsic.X86Subsd, mask, res); + } + + res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: true, sizeF); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipStepFused), op1, op2); + }); + } + } + + public static void Frecps_V(ArmEmitterContext context) // Fused. + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FrecpsV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + Operand res; + + if (sizeF == 0) + { + Operand mask = X86GetAllElements(context, 2f); + + if (Optimizations.UseFma) + { + res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, mask, n, m); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m); + res = context.AddIntrinsic(Intrinsic.X86Subps, mask, res); + } + + res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: false, sizeF); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand mask = X86GetAllElements(context, 2d); + + if (Optimizations.UseFma) + { + res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, mask, n, m); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m); + res = context.AddIntrinsic(Intrinsic.X86Subpd, mask, res); + } + + res = EmitSse41RecipStepSelectOpF(context, n, m, res, mask, scalar: false, sizeF); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipStepFused), op1, op2); + }); + } + } + + public static void Frecpx_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrecpxS); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecpX), op1); + }); + } + } + + public static void Frinta_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintaS); + } + else if (Optimizations.UseSse41) + { + EmitSse41ScalarRoundOpF(context, FPRoundingMode.ToNearestAway); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1); + }); + } + } + + public static void Frinta_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintaV); + } + else if (Optimizations.UseSse41) + { + EmitSse41VectorRoundOpF(context, FPRoundingMode.ToNearestAway); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1); + }); + } + } + + public static void Frinti_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintiS); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitRoundByRMode(context, op1); + }); + } + } + + public static void Frinti_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintiV); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitRoundByRMode(context, op1); + }); + } + } + + public static void Frintm_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintmS); + } + else if (Optimizations.UseSse41) + { + EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsMinusInfinity); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, nameof(Math.Floor), op1); + }); + } + } + + public static void Frintm_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintmV); + } + else if (Optimizations.UseSse41) + { + EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsMinusInfinity); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, nameof(Math.Floor), op1); + }); + } + } + + public static void Frintn_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintnS); + } + else if (Optimizations.UseSse41) + { + EmitSse41ScalarRoundOpF(context, FPRoundingMode.ToNearest); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitRoundMathCall(context, MidpointRounding.ToEven, op1); + }); + } + } + + public static void Frintn_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintnV); + } + else if (Optimizations.UseSse41) + { + EmitSse41VectorRoundOpF(context, FPRoundingMode.ToNearest); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitRoundMathCall(context, MidpointRounding.ToEven, op1); + }); + } + } + + public static void Frintp_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintpS); + } + else if (Optimizations.UseSse41) + { + EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsPlusInfinity); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, nameof(Math.Ceiling), op1); + }); + } + } + + public static void Frintp_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintpV); + } + else if (Optimizations.UseSse41) + { + EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsPlusInfinity); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, nameof(Math.Ceiling), op1); + }); + } + } + + public static void Frintx_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintxS); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitRoundByRMode(context, op1); + }); + } + } + + public static void Frintx_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintxV); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitRoundByRMode(context, op1); + }); + } + } + + public static void Frintz_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrintzS); + } + else if (Optimizations.UseSse41) + { + EmitSse41ScalarRoundOpF(context, FPRoundingMode.TowardsZero); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, nameof(Math.Truncate), op1); + }); + } + } + + public static void Frintz_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrintzV); + } + else if (Optimizations.UseSse41) + { + EmitSse41VectorRoundOpF(context, FPRoundingMode.TowardsZero); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitUnaryMathCall(context, nameof(Math.Truncate), op1); + }); + } + } + + public static void Frsqrte_S(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FrsqrteS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) + { + Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rsqrtss, GetVec(op.Rn)), scalar: true); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtEstimate), op1); + }); + } + } + + public static void Frsqrte_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FrsqrteV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41 && sizeF == 0) + { + Operand res = EmitSse41Round32Exp8OpF(context, context.AddIntrinsic(Intrinsic.X86Rsqrtps, GetVec(op.Rn)), scalar: false); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtEstimate), op1); + }); + } + } + + public static void Frsqrts_S(ArmEmitterContext context) // Fused. + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FrsqrtsS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + Operand res; + + if (sizeF == 0) + { + Operand maskHalf = X86GetScalar(context, 0.5f); + Operand maskThree = X86GetScalar(context, 3f); + Operand maskOneHalf = X86GetScalar(context, 1.5f); + + if (Optimizations.UseFma) + { + res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ss, maskThree, n, m); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m); + res = context.AddIntrinsic(Intrinsic.X86Subss, maskThree, res); + } + + res = context.AddIntrinsic(Intrinsic.X86Mulss, maskHalf, res); + res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: true, sizeF); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + else /* if (sizeF == 1) */ + { + Operand maskHalf = X86GetScalar(context, 0.5d); + Operand maskThree = X86GetScalar(context, 3d); + Operand maskOneHalf = X86GetScalar(context, 1.5d); + + if (Optimizations.UseFma) + { + res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231sd, maskThree, n, m); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m); + res = context.AddIntrinsic(Intrinsic.X86Subsd, maskThree, res); + } + + res = context.AddIntrinsic(Intrinsic.X86Mulsd, maskHalf, res); + res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: true, sizeF); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper64(res)); + } + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtStepFused), op1, op2); + }); + } + } + + public static void Frsqrts_V(ArmEmitterContext context) // Fused. + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FrsqrtsV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int sizeF = op.Size & 1; + + Operand res; + + if (sizeF == 0) + { + Operand maskHalf = X86GetAllElements(context, 0.5f); + Operand maskThree = X86GetAllElements(context, 3f); + Operand maskOneHalf = X86GetAllElements(context, 1.5f); + + if (Optimizations.UseFma) + { + res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231ps, maskThree, n, m); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m); + res = context.AddIntrinsic(Intrinsic.X86Subps, maskThree, res); + } + + res = context.AddIntrinsic(Intrinsic.X86Mulps, maskHalf, res); + res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: false, sizeF); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand maskHalf = X86GetAllElements(context, 0.5d); + Operand maskThree = X86GetAllElements(context, 3d); + Operand maskOneHalf = X86GetAllElements(context, 1.5d); + + if (Optimizations.UseFma) + { + res = context.AddIntrinsic(Intrinsic.X86Vfnmadd231pd, maskThree, n, m); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m); + res = context.AddIntrinsic(Intrinsic.X86Subpd, maskThree, res); + } + + res = context.AddIntrinsic(Intrinsic.X86Mulpd, maskHalf, res); + res = EmitSse41RecipStepSelectOpF(context, n, m, res, maskOneHalf, scalar: false, sizeF); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtStepFused), op1, op2); + }); + } + } + + public static void Fsqrt_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FsqrtS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarUnaryOpF(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd); + } + else + { + EmitScalarUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSqrt), op1); + }); + } + } + + public static void Fsqrt_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FsqrtV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorUnaryOpF(context, Intrinsic.X86Sqrtps, Intrinsic.X86Sqrtpd); + } + else + { + EmitVectorUnaryOpF(context, (op1) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSqrt), op1); + }); + } + } + + public static void Fsub_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOpF(context, Intrinsic.Arm64FsubS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF(context, Intrinsic.X86Subss, Intrinsic.X86Subsd); + } + else if (Optimizations.FastFP) + { + EmitScalarBinaryOpF(context, (op1, op2) => context.Subtract(op1, op2)); + } + else + { + EmitScalarBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, op2); + }); + } + } + + public static void Fsub_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpF(context, Intrinsic.Arm64FsubV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF(context, Intrinsic.X86Subps, Intrinsic.X86Subpd); + } + else if (Optimizations.FastFP) + { + EmitVectorBinaryOpF(context, (op1, op2) => context.Subtract(op1, op2)); + } + else + { + EmitVectorBinaryOpF(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, op2); + }); + } + } + + public static void Mla_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64MlaV); + } + else if (Optimizations.UseSse41) + { + EmitSse41VectorMul_AddSub(context, AddSub.Add); + } + else + { + EmitVectorTernaryOpZx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Mla_Ve(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64MlaVe); + } + else + { + EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Mls_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64MlsV); + } + else if (Optimizations.UseSse41) + { + EmitSse41VectorMul_AddSub(context, AddSub.Subtract); + } + else + { + EmitVectorTernaryOpZx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Mls_Ve(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64MlsVe); + } + else + { + EmitVectorTernaryOpByElemZx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Mul_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64MulV); + } + else if (Optimizations.UseSse41) + { + EmitSse41VectorMul_AddSub(context, AddSub.None); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2)); + } + } + + public static void Mul_Ve(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64MulVe); + } + else + { + EmitVectorBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2)); + } + } + + public static void Neg_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOp(context, Intrinsic.Arm64NegS); + } + else + { + EmitScalarUnaryOpSx(context, (op1) => context.Negate(op1)); + } + } + + public static void Neg_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64NegV); + } + else if (Optimizations.UseSse2) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Intrinsic subInst = X86PsubInstruction[op.Size]; + + Operand res = context.AddIntrinsic(subInst, context.VectorZero(), GetVec(op.Rn)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorUnaryOpSx(context, (op1) => context.Negate(op1)); + } + } + + public static void Pmull_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseArm64Pmull) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64PmullV); + } + else if (Optimizations.UsePclmulqdq && op.Size == 3) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + int imm8 = op.RegisterSize == RegisterSize.Simd64 ? 0b0000_0000 : 0b0001_0001; + + Operand res = context.AddIntrinsic(Intrinsic.X86Pclmulqdq, n, m, Const(imm8)); + + context.Copy(GetVec(op.Rd), res); + } + else if (Optimizations.UseSse41) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd64) + { + n = context.VectorZeroUpper64(n); + m = context.VectorZeroUpper64(m); + } + else /* if (op.RegisterSize == RegisterSize.Simd128) */ + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Operand res = context.VectorZero(); + + if (op.Size == 0) + { + n = context.AddIntrinsic(Intrinsic.X86Pmovzxbw, n); + m = context.AddIntrinsic(Intrinsic.X86Pmovzxbw, m); + + for (int i = 0; i < 8; i++) + { + Operand mask = context.AddIntrinsic(Intrinsic.X86Psllw, n, Const(15 - i)); + mask = context.AddIntrinsic(Intrinsic.X86Psraw, mask, Const(15)); + + Operand tmp = context.AddIntrinsic(Intrinsic.X86Psllw, m, Const(i)); + tmp = context.AddIntrinsic(Intrinsic.X86Pand, tmp, mask); + + res = context.AddIntrinsic(Intrinsic.X86Pxor, res, tmp); + } + } + else /* if (op.Size == 3) */ + { + Operand zero = context.VectorZero(); + + for (int i = 0; i < 64; i++) + { + Operand mask = context.AddIntrinsic(Intrinsic.X86Movlhps, n, n); + mask = context.AddIntrinsic(Intrinsic.X86Psllq, mask, Const(63 - i)); + mask = context.AddIntrinsic(Intrinsic.X86Psrlq, mask, Const(63)); + mask = context.AddIntrinsic(Intrinsic.X86Psubq, zero, mask); + + Operand tmp = EmitSse2Sll_128(context, m, i); + tmp = context.AddIntrinsic(Intrinsic.X86Pand, tmp, mask); + + res = context.AddIntrinsic(Intrinsic.X86Pxor, res, tmp); + } + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res; + + if (op.Size == 0) + { + res = context.VectorZero(); + + int part = op.RegisterSize == RegisterSize.Simd64 ? 0 : 8; + + for (int index = 0; index < 8; index++) + { + Operand ne = context.VectorExtract8(n, part + index); + Operand me = context.VectorExtract8(m, part + index); + + Operand de = EmitPolynomialMultiply(context, ne, me, 8); + + res = EmitVectorInsert(context, res, de, index, 1); + } + } + else /* if (op.Size == 3) */ + { + int part = op.RegisterSize == RegisterSize.Simd64 ? 0 : 1; + + Operand ne = context.VectorExtract(OperandType.I64, n, part); + Operand me = context.VectorExtract(OperandType.I64, m, part); + + res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.PolynomialMult64_128)), ne, me); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Raddhn_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64RaddhnV); + } + else + { + EmitHighNarrow(context, (op1, op2) => context.Add(op1, op2), round: true); + } + } + + public static void Rsubhn_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64RsubhnV); + } + else + { + EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: true); + } + } + + public static void Saba_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SabaV); + } + else + { + EmitVectorTernaryOpSx(context, (op1, op2, op3) => + { + return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); + }); + } + } + + public static void Sabal_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SabalV); + } + else + { + EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) => + { + return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); + }); + } + } + + public static void Sabd_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SabdV); + } + else if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + EmitSse41VectorSabdOp(context, op, n, m, isLong: false); + } + else + { + EmitVectorBinaryOpSx(context, (op1, op2) => + { + return EmitAbs(context, context.Subtract(op1, op2)); + }); + } + } + + public static void Sabdl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SabdlV); + } + else if (Optimizations.UseSse41 && op.Size < 2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = op.Size == 0 + ? Intrinsic.X86Pmovsxbw + : Intrinsic.X86Pmovsxwd; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + EmitSse41VectorSabdOp(context, op, n, m, isLong: true); + } + else + { + EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => + { + return EmitAbs(context, context.Subtract(op1, op2)); + }); + } + } + + public static void Sadalp_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpRd(context, Intrinsic.Arm64SadalpV); + } + else + { + EmitAddLongPairwise(context, signed: true, accumulate: true); + } + } + + public static void Saddl_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SaddlV); + } + else if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovsxInstruction[op.Size]; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Intrinsic addInst = X86PaddInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m)); + } + else + { + EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Saddlp_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SaddlpV); + } + else + { + EmitAddLongPairwise(context, signed: true, accumulate: false); + } + } + + public static void Saddlv_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SaddlvV); + } + else + { + EmitVectorLongAcrossVectorOpSx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Saddw_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SaddwV); + } + else if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovsxInstruction[op.Size]; + + m = context.AddIntrinsic(movInst, m); + + Intrinsic addInst = X86PaddInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m)); + } + else + { + EmitVectorWidenRmBinaryOpSx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Shadd_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64ShaddV); + } + else if (Optimizations.UseSse2 && op.Size > 0) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m); + Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m); + + Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psraw : Intrinsic.X86Psrad; + + res2 = context.AddIntrinsic(shiftInst, res2, Const(1)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, res2); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpSx(context, (op1, op2) => + { + return context.ShiftRightSI(context.Add(op1, op2), Const(1)); + }); + } + } + + public static void Shsub_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64ShsubV); + } + else if (Optimizations.UseSse2 && op.Size < 2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand mask = X86GetAllElements(context, (int)(op.Size == 0 ? 0x80808080u : 0x80008000u)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + Operand nPlusMask = context.AddIntrinsic(addInst, n, mask); + Operand mPlusMask = context.AddIntrinsic(addInst, m, mask); + + Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw; + + Operand res = context.AddIntrinsic(avgInst, nPlusMask, mPlusMask); + + Intrinsic subInst = X86PsubInstruction[op.Size]; + + res = context.AddIntrinsic(subInst, nPlusMask, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpSx(context, (op1, op2) => + { + return context.ShiftRightSI(context.Subtract(op1, op2), Const(1)); + }); + } + } + + public static void Smax_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmaxV); + } + else if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic maxInst = X86PmaxsInstruction[op.Size]; + + Operand res = context.AddIntrinsic(maxInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpSx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: true)); + } + } + + public static void Smaxp_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmaxpV); + } + else if (Optimizations.UseSsse3) + { + EmitSsse3VectorPairwiseOp(context, X86PmaxsInstruction); + } + else + { + EmitVectorPairwiseOpSx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: true)); + } + } + + public static void Smaxv_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SmaxvV); + } + else + { + EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: true)); + } + } + + public static void Smin_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SminV); + } + else if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic minInst = X86PminsInstruction[op.Size]; + + Operand res = context.AddIntrinsic(minInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpSx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: true)); + } + } + + public static void Sminp_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SminpV); + } + else if (Optimizations.UseSsse3) + { + EmitSsse3VectorPairwiseOp(context, X86PminsInstruction); + } + else + { + EmitVectorPairwiseOpSx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: true)); + } + } + + public static void Sminv_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64SminvV); + } + else + { + EmitVectorAcrossVectorOpSx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: true)); + } + } + + public static void Smlal_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SmlalV); + } + else if (Optimizations.UseSse41 && op.Size < 2) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovsxInstruction[op.Size]; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld; + + Operand res = context.AddIntrinsic(mullInst, n, m); + + Intrinsic addInst = X86PaddInstruction[op.Size + 1]; + + context.Copy(d, context.AddIntrinsic(addInst, d, res)); + } + else + { + EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Smlal_Ve(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64SmlalVe); + } + else + { + EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Smlsl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SmlslV); + } + else if (Optimizations.UseSse41 && op.Size < 2) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = op.Size == 0 ? Intrinsic.X86Pmovsxbw : Intrinsic.X86Pmovsxwd; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld; + + Operand res = context.AddIntrinsic(mullInst, n, m); + + Intrinsic subInst = X86PsubInstruction[op.Size + 1]; + + context.Copy(d, context.AddIntrinsic(subInst, d, res)); + } + else + { + EmitVectorWidenRnRmTernaryOpSx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Smlsl_Ve(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64SmlslVe); + } + else + { + EmitVectorWidenTernaryOpByElemSx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Smull_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SmullV); + } + else + { + EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Multiply(op1, op2)); + } + } + + public static void Smull_Ve(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64SmullVe); + } + else + { + EmitVectorWidenBinaryOpByElemSx(context, (op1, op2) => context.Multiply(op1, op2)); + } + } + + public static void Sqabs_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingUnaryOp(context, Intrinsic.Arm64SqabsS); + } + else + { + EmitScalarSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + } + } + + public static void Sqabs_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingUnaryOp(context, Intrinsic.Arm64SqabsV); + } + else + { + EmitVectorSaturatingUnaryOpSx(context, (op1) => EmitAbs(context, op1)); + } + } + + public static void Sqadd_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqaddS); + } + else + { + EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add); + } + } + + public static void Sqadd_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqaddV); + } + else + { + EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Add); + } + } + + public static void Sqdmulh_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqdmulhS); + } + else + { + EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false)); + } + } + + public static void Sqdmulh_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqdmulhV); + } + else + { + EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false)); + } + } + + public static void Sqdmulh_Ve(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpByElem(context, Intrinsic.Arm64SqdmulhVe); + } + else + { + EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: false)); + } + } + + public static void Sqneg_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingUnaryOp(context, Intrinsic.Arm64SqnegS); + } + else + { + EmitScalarSaturatingUnaryOpSx(context, (op1) => context.Negate(op1)); + } + } + + public static void Sqneg_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingUnaryOp(context, Intrinsic.Arm64SqnegV); + } + else + { + EmitVectorSaturatingUnaryOpSx(context, (op1) => context.Negate(op1)); + } + } + + public static void Sqrdmulh_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqrdmulhS); + } + else + { + EmitScalarSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true)); + } + } + + public static void Sqrdmulh_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqrdmulhV); + } + else + { + EmitVectorSaturatingBinaryOpSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true)); + } + } + + public static void Sqrdmulh_Ve(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpByElem(context, Intrinsic.Arm64SqrdmulhVe); + } + else + { + EmitVectorSaturatingBinaryOpByElemSx(context, (op1, op2) => EmitDoublingMultiplyHighHalf(context, op1, op2, round: true)); + } + } + + public static void Sqsub_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64SqsubS); + } + else + { + EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub); + } + } + + public static void Sqsub_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqsubV); + } + else + { + EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Sub); + } + } + + public static void Sqxtn_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtnS); + } + else + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxSx); + } + } + + public static void Sqxtn_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtnV); + } + else + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxSx); + } + } + + public static void Sqxtun_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtunS); + } + else + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarSxZx); + } + } + + public static void Sqxtun_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SqxtunV); + } + else + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorSxZx); + } + } + + public static void Srhadd_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SrhaddV); + } + else if (Optimizations.UseSse2 && op.Size < 2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand mask = X86GetAllElements(context, (int)(op.Size == 0 ? 0x80808080u : 0x80008000u)); + + Intrinsic subInst = X86PsubInstruction[op.Size]; + + Operand nMinusMask = context.AddIntrinsic(subInst, n, mask); + Operand mMinusMask = context.AddIntrinsic(subInst, m, mask); + + Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw; + + Operand res = context.AddIntrinsic(avgInst, nMinusMask, mMinusMask); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, mask, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpSx(context, (op1, op2) => + { + Operand res = context.Add(op1, op2); + + res = context.Add(res, Const(1L)); + + return context.ShiftRightSI(res, Const(1)); + }); + } + } + + public static void Ssubl_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SsublV); + } + else if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovsxInstruction[op.Size]; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Intrinsic subInst = X86PsubInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m)); + } + else + { + EmitVectorWidenRnRmBinaryOpSx(context, (op1, op2) => context.Subtract(op1, op2)); + } + } + + public static void Ssubw_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SsubwV); + } + else if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovsxInstruction[op.Size]; + + m = context.AddIntrinsic(movInst, m); + + Intrinsic subInst = X86PsubInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m)); + } + else + { + EmitVectorWidenRmBinaryOpSx(context, (op1, op2) => context.Subtract(op1, op2)); + } + } + + public static void Sub_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64SubS); + } + else + { + EmitScalarBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2)); + } + } + + public static void Sub_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SubV); + } + else if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic subInst = X86PsubInstruction[op.Size]; + + Operand res = context.AddIntrinsic(subInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2)); + } + } + + public static void Subhn_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64SubhnV); + } + else + { + EmitHighNarrow(context, (op1, op2) => context.Subtract(op1, op2), round: false); + } + } + + public static void Suqadd_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64SuqaddS); + } + else + { + EmitScalarSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate); + } + } + + public static void Suqadd_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64SuqaddV); + } + else + { + EmitVectorSaturatingBinaryOpSx(context, flags: SaturatingFlags.Accumulate); + } + } + + public static void Uaba_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UabaV); + } + else + { + EmitVectorTernaryOpZx(context, (op1, op2, op3) => + { + return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); + }); + } + } + + public static void Uabal_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UabalV); + } + else + { + EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) => + { + return context.Add(op1, EmitAbs(context, context.Subtract(op2, op3))); + }); + } + } + + public static void Uabd_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UabdV); + } + else if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + EmitSse41VectorUabdOp(context, op, n, m, isLong: false); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => + { + return EmitAbs(context, context.Subtract(op1, op2)); + }); + } + } + + public static void Uabdl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UabdlV); + } + else if (Optimizations.UseSse41 && op.Size < 2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = op.Size == 0 + ? Intrinsic.X86Pmovzxbw + : Intrinsic.X86Pmovzxwd; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + EmitSse41VectorUabdOp(context, op, n, m, isLong: true); + } + else + { + EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => + { + return EmitAbs(context, context.Subtract(op1, op2)); + }); + } + } + + public static void Uadalp_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpRd(context, Intrinsic.Arm64UadalpV); + } + else + { + EmitAddLongPairwise(context, signed: false, accumulate: true); + } + } + + public static void Uaddl_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UaddlV); + } + else if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovzxInstruction[op.Size]; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Intrinsic addInst = X86PaddInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m)); + } + else + { + EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Uaddlp_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UaddlpV); + } + else + { + EmitAddLongPairwise(context, signed: false, accumulate: false); + } + } + + public static void Uaddlv_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UaddlvV); + } + else + { + EmitVectorLongAcrossVectorOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Uaddw_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UaddwV); + } + else if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovzxInstruction[op.Size]; + + m = context.AddIntrinsic(movInst, m); + + Intrinsic addInst = X86PaddInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(addInst, n, m)); + } + else + { + EmitVectorWidenRmBinaryOpZx(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Uhadd_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UhaddV); + } + else if (Optimizations.UseSse2 && op.Size > 0) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m); + Operand res2 = context.AddIntrinsic(Intrinsic.X86Pxor, n, m); + + Intrinsic shiftInst = op.Size == 1 ? Intrinsic.X86Psrlw : Intrinsic.X86Psrld; + + res2 = context.AddIntrinsic(shiftInst, res2, Const(1)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, res2); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => + { + return context.ShiftRightUI(context.Add(op1, op2), Const(1)); + }); + } + } + + public static void Uhsub_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UhsubV); + } + else if (Optimizations.UseSse2 && op.Size < 2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw; + + Operand res = context.AddIntrinsic(avgInst, n, m); + + Intrinsic subInst = X86PsubInstruction[op.Size]; + + res = context.AddIntrinsic(subInst, n, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => + { + return context.ShiftRightUI(context.Subtract(op1, op2), Const(1)); + }); + } + } + + public static void Umax_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmaxV); + } + else if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic maxInst = X86PmaxuInstruction[op.Size]; + + Operand res = context.AddIntrinsic(maxInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: false)); + } + } + + public static void Umaxp_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmaxpV); + } + else if (Optimizations.UseSsse3) + { + EmitSsse3VectorPairwiseOp(context, X86PmaxuInstruction); + } + else + { + EmitVectorPairwiseOpZx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: false)); + } + } + + public static void Umaxv_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UmaxvV); + } + else + { + EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMax64Op(context, op1, op2, signed: false)); + } + } + + public static void Umin_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UminV); + } + else if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic minInst = X86PminuInstruction[op.Size]; + + Operand res = context.AddIntrinsic(minInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: false)); + } + } + + public static void Uminp_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UminpV); + } + else if (Optimizations.UseSsse3) + { + EmitSsse3VectorPairwiseOp(context, X86PminuInstruction); + } + else + { + EmitVectorPairwiseOpZx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: false)); + } + } + + public static void Uminv_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64UminvV); + } + else + { + EmitVectorAcrossVectorOpZx(context, (op1, op2) => EmitMin64Op(context, op1, op2, signed: false)); + } + } + + public static void Umlal_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UmlalV); + } + else if (Optimizations.UseSse41 && op.Size < 2) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovzxInstruction[op.Size]; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld; + + Operand res = context.AddIntrinsic(mullInst, n, m); + + Intrinsic addInst = X86PaddInstruction[op.Size + 1]; + + context.Copy(d, context.AddIntrinsic(addInst, d, res)); + } + else + { + EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Umlal_Ve(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64UmlalVe); + } + else + { + EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Umlsl_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64UmlslV); + } + else if (Optimizations.UseSse41 && op.Size < 2) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = op.Size == 0 ? Intrinsic.X86Pmovzxbw : Intrinsic.X86Pmovzxwd; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Intrinsic mullInst = op.Size == 0 ? Intrinsic.X86Pmullw : Intrinsic.X86Pmulld; + + Operand res = context.AddIntrinsic(mullInst, n, m); + + Intrinsic subInst = X86PsubInstruction[op.Size + 1]; + + context.Copy(d, context.AddIntrinsic(subInst, d, res)); + } + else + { + EmitVectorWidenRnRmTernaryOpZx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Umlsl_Ve(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRdByElem(context, Intrinsic.Arm64UmlslVe); + } + else + { + EmitVectorWidenTernaryOpByElemZx(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + } + + public static void Umull_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UmullV); + } + else + { + EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Multiply(op1, op2)); + } + } + + public static void Umull_Ve(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpByElem(context, Intrinsic.Arm64UmullVe); + } + else + { + EmitVectorWidenBinaryOpByElemZx(context, (op1, op2) => context.Multiply(op1, op2)); + } + } + + public static void Uqadd_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64UqaddS); + } + else + { + EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Add); + } + } + + public static void Uqadd_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqaddV); + } + else + { + EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Add); + } + } + + public static void Uqsub_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOp(context, Intrinsic.Arm64UqsubS); + } + else + { + EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Sub); + } + } + + public static void Uqsub_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqsubV); + } + else + { + EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Sub); + } + } + + public static void Uqxtn_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64UqxtnS); + } + else + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.ScalarZxZx); + } + } + + public static void Uqxtn_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64UqxtnV); + } + else + { + EmitSaturatingNarrowOp(context, SaturatingNarrowFlags.VectorZxZx); + } + } + + public static void Urhadd_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UrhaddV); + } + else if (Optimizations.UseSse2 && op.Size < 2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic avgInst = op.Size == 0 ? Intrinsic.X86Pavgb : Intrinsic.X86Pavgw; + + Operand res = context.AddIntrinsic(avgInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => + { + Operand res = context.Add(op1, op2); + + res = context.Add(res, Const(1L)); + + return context.ShiftRightUI(res, Const(1)); + }); + } + } + + public static void Usqadd_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarSaturatingBinaryOpRd(context, Intrinsic.Arm64UsqaddS); + } + else + { + EmitScalarSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate); + } + } + + public static void Usqadd_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOpRd(context, Intrinsic.Arm64UsqaddV); + } + else + { + EmitVectorSaturatingBinaryOpZx(context, SaturatingFlags.Accumulate); + } + } + + public static void Usubl_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UsublV); + } + else if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovzxInstruction[op.Size]; + + n = context.AddIntrinsic(movInst, n); + m = context.AddIntrinsic(movInst, m); + + Intrinsic subInst = X86PsubInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m)); + } + else + { + EmitVectorWidenRnRmBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2)); + } + } + + public static void Usubw_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UsubwV); + } + else if (Optimizations.UseSse41) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + m = context.AddIntrinsic(Intrinsic.X86Psrldq, m, Const(8)); + } + + Intrinsic movInst = X86PmovzxInstruction[op.Size]; + + m = context.AddIntrinsic(movInst, m); + + Intrinsic subInst = X86PsubInstruction[op.Size + 1]; + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(subInst, n, m)); + } + else + { + EmitVectorWidenRmBinaryOpZx(context, (op1, op2) => context.Subtract(op1, op2)); + } + } + + private static Operand EmitAbs(ArmEmitterContext context, Operand value) + { + Operand isPositive = context.ICompareGreaterOrEqual(value, Const(value.Type, 0)); + + return context.ConditionalSelect(isPositive, value, context.Negate(value)); + } + + private static void EmitAddLongPairwise(ArmEmitterContext context, bool signed, bool accumulate) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int pairs = op.GetPairsCount() >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand ne0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed); + Operand ne1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed); + + Operand e = context.Add(ne0, ne1); + + if (accumulate) + { + Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed); + + e = context.Add(e, de); + } + + res = EmitVectorInsert(context, res, e, index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static Operand EmitDoublingMultiplyHighHalf( + ArmEmitterContext context, + Operand n, + Operand m, + bool round) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int eSize = 8 << op.Size; + + Operand res = context.Multiply(n, m); + + if (!round) + { + res = context.ShiftRightSI(res, Const(eSize - 1)); + } + else + { + long roundConst = 1L << (eSize - 1); + + res = context.ShiftLeft(res, Const(1)); + + res = context.Add(res, Const(roundConst)); + + res = context.ShiftRightSI(res, Const(eSize)); + + Operand isIntMin = context.ICompareEqual(res, Const((long)int.MinValue)); + + res = context.ConditionalSelect(isIntMin, context.Negate(res), res); + } + + return res; + } + + private static void EmitHighNarrow(ArmEmitterContext context, Func2I emit, bool round) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + int elems = 8 >> op.Size; + int eSize = 8 << op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + Operand d = GetVec(op.Rd); + + Operand res = part == 0 ? context.VectorZero() : context.Copy(d); + + long roundConst = 1L << (eSize - 1); + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size + 1); + + Operand de = emit(ne, me); + + if (round) + { + de = context.Add(de, Const(roundConst)); + } + + de = context.ShiftRightUI(de, Const(eSize)); + + res = EmitVectorInsert(context, res, de, part + index, op.Size); + } + + context.Copy(d, res); + } + + private static Operand EmitMax64Op(ArmEmitterContext context, Operand op1, Operand op2, bool signed) + { + Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64); + + Operand cmp = signed + ? context.ICompareGreaterOrEqual (op1, op2) + : context.ICompareGreaterOrEqualUI(op1, op2); + + return context.ConditionalSelect(cmp, op1, op2); + } + + private static Operand EmitMin64Op(ArmEmitterContext context, Operand op1, Operand op2, bool signed) + { + Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64); + + Operand cmp = signed + ? context.ICompareLessOrEqual (op1, op2) + : context.ICompareLessOrEqualUI(op1, op2); + + return context.ConditionalSelect(cmp, op1, op2); + } + + private static void EmitSse41ScalarRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Operand res; + + if (roundMode != FPRoundingMode.ToNearestAway) + { + Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundsd : Intrinsic.X86Roundss; + + res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode))); + } + else + { + res = EmitSse41RoundToNearestWithTiesToAwayOpF(context, n, scalar: true); + } + + if ((op.Size & 1) != 0) + { + res = context.VectorZeroUpper64(res); + } + else + { + res = context.VectorZeroUpper96(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitSse41VectorRoundOpF(ArmEmitterContext context, FPRoundingMode roundMode) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Operand res; + + if (roundMode != FPRoundingMode.ToNearestAway) + { + Intrinsic inst = (op.Size & 1) != 0 ? Intrinsic.X86Roundpd : Intrinsic.X86Roundps; + + res = context.AddIntrinsic(inst, n, Const(X86GetRoundControl(roundMode))); + } + else + { + res = EmitSse41RoundToNearestWithTiesToAwayOpF(context, n, scalar: false); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static Operand EmitSse41Round32Exp8OpF(ArmEmitterContext context, Operand value, bool scalar) + { + Operand roundMask; + Operand truncMask; + Operand expMask; + + if (scalar) + { + roundMask = X86GetScalar(context, 0x4000); + truncMask = X86GetScalar(context, unchecked((int)0xFFFF8000)); + expMask = X86GetScalar(context, 0x7F800000); + } + else + { + roundMask = X86GetAllElements(context, 0x4000); + truncMask = X86GetAllElements(context, unchecked((int)0xFFFF8000)); + expMask = X86GetAllElements(context, 0x7F800000); + } + + Operand oValue = value; + Operand masked = context.AddIntrinsic(Intrinsic.X86Pand, value, expMask); + Operand isNaNInf = context.AddIntrinsic(Intrinsic.X86Pcmpeqd, masked, expMask); + + value = context.AddIntrinsic(Intrinsic.X86Paddd, value, roundMask); + value = context.AddIntrinsic(Intrinsic.X86Pand, value, truncMask); + + return context.AddIntrinsic(Intrinsic.X86Blendvps, value, oValue, isNaNInf); + } + + private static Operand EmitSse41RecipStepSelectOpF( + ArmEmitterContext context, + Operand n, + Operand m, + Operand res, + Operand mask, + bool scalar, + int sizeF) + { + Intrinsic cmpOp; + Intrinsic shlOp; + Intrinsic blendOp; + Operand zero = context.VectorZero(); + Operand expMask; + + if (sizeF == 0) + { + cmpOp = Intrinsic.X86Pcmpeqd; + shlOp = Intrinsic.X86Pslld; + blendOp = Intrinsic.X86Blendvps; + expMask = scalar ? X86GetScalar(context, 0x7F800000 << 1) : X86GetAllElements(context, 0x7F800000 << 1); + } + else /* if (sizeF == 1) */ + { + cmpOp = Intrinsic.X86Pcmpeqq; + shlOp = Intrinsic.X86Psllq; + blendOp = Intrinsic.X86Blendvpd; + expMask = scalar ? X86GetScalar(context, 0x7FF0000000000000L << 1) : X86GetAllElements(context, 0x7FF0000000000000L << 1); + } + + n = context.AddIntrinsic(shlOp, n, Const(1)); + m = context.AddIntrinsic(shlOp, m, Const(1)); + + Operand nZero = context.AddIntrinsic(cmpOp, n, zero); + Operand mZero = context.AddIntrinsic(cmpOp, m, zero); + Operand nInf = context.AddIntrinsic(cmpOp, n, expMask); + Operand mInf = context.AddIntrinsic(cmpOp, m, expMask); + + Operand nmZero = context.AddIntrinsic(Intrinsic.X86Por, nZero, mZero); + Operand nmInf = context.AddIntrinsic(Intrinsic.X86Por, nInf, mInf); + Operand nmZeroInf = context.AddIntrinsic(Intrinsic.X86Pand, nmZero, nmInf); + + return context.AddIntrinsic(blendOp, res, mask, nmZeroInf); + } + + public static void EmitSse2VectorIsNaNOpF( + ArmEmitterContext context, + Operand opF, + out Operand qNaNMask, + out Operand sNaNMask, + bool? isQNaN = null) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + if ((op.Size & 1) == 0) + { + const int QBit = 22; + + Operand qMask = X86GetAllElements(context, 1 << QBit); + + Operand mask1 = context.AddIntrinsic(Intrinsic.X86Cmpps, opF, opF, Const((int)CmpCondition.UnorderedQ)); + + Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask); + mask2 = context.AddIntrinsic(Intrinsic.X86Cmpps, mask2, qMask, Const((int)CmpCondition.Equal)); + + qNaNMask = isQNaN == null || (bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andps, mask2, mask1) : default; + sNaNMask = isQNaN == null || !(bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andnps, mask2, mask1) : default; + } + else /* if ((op.Size & 1) == 1) */ + { + const int QBit = 51; + + Operand qMask = X86GetAllElements(context, 1L << QBit); + + Operand mask1 = context.AddIntrinsic(Intrinsic.X86Cmppd, opF, opF, Const((int)CmpCondition.UnorderedQ)); + + Operand mask2 = context.AddIntrinsic(Intrinsic.X86Pand, opF, qMask); + mask2 = context.AddIntrinsic(Intrinsic.X86Cmppd, mask2, qMask, Const((int)CmpCondition.Equal)); + + qNaNMask = isQNaN == null || (bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andpd, mask2, mask1) : default; + sNaNMask = isQNaN == null || !(bool)isQNaN ? context.AddIntrinsic(Intrinsic.X86Andnpd, mask2, mask1) : default; + } + } + + public static Operand EmitSse41ProcessNaNsOpF( + ArmEmitterContext context, + Func2I emit, + bool scalar, + Operand n = default, + Operand m = default) + { + Operand nCopy = n == default ? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rn)) : n; + Operand mCopy = m == default ? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rm)) : m; + + EmitSse2VectorIsNaNOpF(context, nCopy, out Operand nQNaNMask, out Operand nSNaNMask); + EmitSse2VectorIsNaNOpF(context, mCopy, out _, out Operand mSNaNMask, isQNaN: false); + + int sizeF = ((IOpCodeSimd)context.CurrOp).Size & 1; + + if (sizeF == 0) + { + const int QBit = 22; + + Operand qMask = scalar ? X86GetScalar(context, 1 << QBit) : X86GetAllElements(context, 1 << QBit); + + Operand resNaNMask = context.AddIntrinsic(Intrinsic.X86Pandn, mSNaNMask, nQNaNMask); + resNaNMask = context.AddIntrinsic(Intrinsic.X86Por, resNaNMask, nSNaNMask); + + Operand resNaN = context.AddIntrinsic(Intrinsic.X86Blendvps, mCopy, nCopy, resNaNMask); + resNaN = context.AddIntrinsic(Intrinsic.X86Por, resNaN, qMask); + + Operand resMask = context.AddIntrinsic(Intrinsic.X86Cmpps, nCopy, mCopy, Const((int)CmpCondition.OrderedQ)); + + Operand res = context.AddIntrinsic(Intrinsic.X86Blendvps, resNaN, emit(nCopy, mCopy), resMask); + + if (n != default || m != default) + { + return res; + } + + if (scalar) + { + res = context.VectorZeroUpper96(res); + } + else if (((OpCodeSimdReg)context.CurrOp).RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res); + + return default; + } + else /* if (sizeF == 1) */ + { + const int QBit = 51; + + Operand qMask = scalar ? X86GetScalar(context, 1L << QBit) : X86GetAllElements(context, 1L << QBit); + + Operand resNaNMask = context.AddIntrinsic(Intrinsic.X86Pandn, mSNaNMask, nQNaNMask); + resNaNMask = context.AddIntrinsic(Intrinsic.X86Por, resNaNMask, nSNaNMask); + + Operand resNaN = context.AddIntrinsic(Intrinsic.X86Blendvpd, mCopy, nCopy, resNaNMask); + resNaN = context.AddIntrinsic(Intrinsic.X86Por, resNaN, qMask); + + Operand resMask = context.AddIntrinsic(Intrinsic.X86Cmppd, nCopy, mCopy, Const((int)CmpCondition.OrderedQ)); + + Operand res = context.AddIntrinsic(Intrinsic.X86Blendvpd, resNaN, emit(nCopy, mCopy), resMask); + + if (n != default || m != default) + { + return res; + } + + if (scalar) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res); + + return default; + } + } + + private static Operand EmitSse2VectorMaxMinOpF(ArmEmitterContext context, Operand n, Operand m, bool isMax) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + if ((op.Size & 1) == 0) + { + Operand mask = X86GetAllElements(context, -0f); + + Operand res = context.AddIntrinsic(isMax ? Intrinsic.X86Maxps : Intrinsic.X86Minps, n, m); + res = context.AddIntrinsic(Intrinsic.X86Andnps, mask, res); + + Operand resSign = context.AddIntrinsic(isMax ? Intrinsic.X86Pand : Intrinsic.X86Por, n, m); + resSign = context.AddIntrinsic(Intrinsic.X86Andps, mask, resSign); + + return context.AddIntrinsic(Intrinsic.X86Por, res, resSign); + } + else /* if ((op.Size & 1) == 1) */ + { + Operand mask = X86GetAllElements(context, -0d); + + Operand res = context.AddIntrinsic(isMax ? Intrinsic.X86Maxpd : Intrinsic.X86Minpd, n, m); + res = context.AddIntrinsic(Intrinsic.X86Andnpd, mask, res); + + Operand resSign = context.AddIntrinsic(isMax ? Intrinsic.X86Pand : Intrinsic.X86Por, n, m); + resSign = context.AddIntrinsic(Intrinsic.X86Andpd, mask, resSign); + + return context.AddIntrinsic(Intrinsic.X86Por, res, resSign); + } + } + + private static Operand EmitSse41MaxMinNumOpF( + ArmEmitterContext context, + bool isMaxNum, + bool scalar, + Operand n = default, + Operand m = default) + { + Operand nCopy = n == default ? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rn)) : n; + Operand mCopy = m == default ? context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rm)) : m; + + EmitSse2VectorIsNaNOpF(context, nCopy, out Operand nQNaNMask, out _, isQNaN: true); + EmitSse2VectorIsNaNOpF(context, mCopy, out Operand mQNaNMask, out _, isQNaN: true); + + int sizeF = ((IOpCodeSimd)context.CurrOp).Size & 1; + + if (sizeF == 0) + { + Operand negInfMask = scalar + ? X86GetScalar (context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity) + : X86GetAllElements(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity); + + Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnps, mQNaNMask, nQNaNMask); + Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnps, nQNaNMask, mQNaNMask); + + nCopy = context.AddIntrinsic(Intrinsic.X86Blendvps, nCopy, negInfMask, nMask); + mCopy = context.AddIntrinsic(Intrinsic.X86Blendvps, mCopy, negInfMask, mMask); + + Operand res = EmitSse41ProcessNaNsOpF(context, (op1, op2) => + { + return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum); + }, scalar: scalar, nCopy, mCopy); + + if (n != default || m != default) + { + return res; + } + + if (scalar) + { + res = context.VectorZeroUpper96(res); + } + else if (((OpCodeSimdReg)context.CurrOp).RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res); + + return default; + } + else /* if (sizeF == 1) */ + { + Operand negInfMask = scalar + ? X86GetScalar (context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity) + : X86GetAllElements(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity); + + Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnpd, mQNaNMask, nQNaNMask); + Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnpd, nQNaNMask, mQNaNMask); + + nCopy = context.AddIntrinsic(Intrinsic.X86Blendvpd, nCopy, negInfMask, nMask); + mCopy = context.AddIntrinsic(Intrinsic.X86Blendvpd, mCopy, negInfMask, mMask); + + Operand res = EmitSse41ProcessNaNsOpF(context, (op1, op2) => + { + return EmitSse2VectorMaxMinOpF(context, op1, op2, isMax: isMaxNum); + }, scalar: scalar, nCopy, mCopy); + + if (n != default || m != default) + { + return res; + } + + if (scalar) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(((OpCodeSimdReg)context.CurrOp).Rd), res); + + return default; + } + } + + private enum AddSub + { + None, + Add, + Subtract + } + + private static void EmitSse41VectorMul_AddSub(ArmEmitterContext context, AddSub addSub) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res; + + if (op.Size == 0) + { + Operand ns8 = context.AddIntrinsic(Intrinsic.X86Psrlw, n, Const(8)); + Operand ms8 = context.AddIntrinsic(Intrinsic.X86Psrlw, m, Const(8)); + + res = context.AddIntrinsic(Intrinsic.X86Pmullw, ns8, ms8); + + res = context.AddIntrinsic(Intrinsic.X86Psllw, res, Const(8)); + + Operand res2 = context.AddIntrinsic(Intrinsic.X86Pmullw, n, m); + + Operand mask = X86GetAllElements(context, 0x00FF00FF); + + res = context.AddIntrinsic(Intrinsic.X86Pblendvb, res, res2, mask); + } + else if (op.Size == 1) + { + res = context.AddIntrinsic(Intrinsic.X86Pmullw, n, m); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Pmulld, n, m); + } + + Operand d = GetVec(op.Rd); + + if (addSub == AddSub.Add) + { + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, d, res); + } + else if (addSub == AddSub.Subtract) + { + Intrinsic subInst = X86PsubInstruction[op.Size]; + + res = context.AddIntrinsic(subInst, d, res); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + + private static void EmitSse41VectorSabdOp( + ArmEmitterContext context, + OpCodeSimdReg op, + Operand n, + Operand m, + bool isLong) + { + int size = isLong ? op.Size + 1 : op.Size; + + Intrinsic cmpgtInst = X86PcmpgtInstruction[size]; + + Operand cmpMask = context.AddIntrinsic(cmpgtInst, n, m); + + Intrinsic subInst = X86PsubInstruction[size]; + + Operand res = context.AddIntrinsic(subInst, n, m); + + res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res); + + Operand res2 = context.AddIntrinsic(subInst, m, n); + + res2 = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, res2); + + res = context.AddIntrinsic(Intrinsic.X86Por, res, res2); + + if (!isLong && op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitSse41VectorUabdOp( + ArmEmitterContext context, + OpCodeSimdReg op, + Operand n, + Operand m, + bool isLong) + { + int size = isLong ? op.Size + 1 : op.Size; + + Intrinsic maxInst = X86PmaxuInstruction[size]; + + Operand max = context.AddIntrinsic(maxInst, m, n); + + Intrinsic cmpeqInst = X86PcmpeqInstruction[size]; + + Operand cmpMask = context.AddIntrinsic(cmpeqInst, max, m); + + Operand onesMask = X86GetAllElements(context, -1L); + + cmpMask = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, onesMask); + + Intrinsic subInst = X86PsubInstruction[size]; + + Operand res = context.AddIntrinsic(subInst, n, m); + Operand res2 = context.AddIntrinsic(subInst, m, n); + + res = context.AddIntrinsic(Intrinsic.X86Pand, cmpMask, res); + res2 = context.AddIntrinsic(Intrinsic.X86Pandn, cmpMask, res2); + + res = context.AddIntrinsic(Intrinsic.X86Por, res, res2); + + if (!isLong && op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static Operand EmitSse2Sll_128(ArmEmitterContext context, Operand op, int shift) + { + // The upper part of op is assumed to be zero. + Debug.Assert(shift >= 0 && shift < 64); + + if (shift == 0) + { + return op; + } + + Operand high = context.AddIntrinsic(Intrinsic.X86Pslldq, op, Const(8)); + high = context.AddIntrinsic(Intrinsic.X86Psrlq, high, Const(64 - shift)); + + Operand low = context.AddIntrinsic(Intrinsic.X86Psllq, op, Const(shift)); + + return context.AddIntrinsic(Intrinsic.X86Por, high, low); + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs new file mode 100644 index 00000000..a9994e41 --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSimdArithmetic32.cs @@ -0,0 +1,1703 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitFlowHelper; +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper32; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Vabd_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + EmitVectorBinaryOpI32(context, (op1, op2) => EmitAbs(context, context.Subtract(op1, op2)), !op.U); + } + + public static void Vabdl_I(ArmEmitterContext context) + { + OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp; + + EmitVectorBinaryLongOpI32(context, (op1, op2) => EmitAbs(context, context.Subtract(op1, op2)), !op.U); + } + + public static void Vabs_S(ArmEmitterContext context) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FabsS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarUnaryOpSimd32(context, (m) => + { + return EmitFloatAbs(context, m, (op.Size & 1) == 0, false); + }); + } + else + { + EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Abs), op1)); + } + } + + public static void Vabs_V(ArmEmitterContext context) + { + OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; + + if (op.F) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FabsV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorUnaryOpSimd32(context, (m) => + { + return EmitFloatAbs(context, m, (op.Size & 1) == 0, true); + }); + } + else + { + EmitVectorUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Abs), op1)); + } + } + else + { + EmitVectorUnaryOpSx32(context, (op1) => EmitAbs(context, op1)); + } + } + + private static Operand EmitAbs(ArmEmitterContext context, Operand value) + { + Operand isPositive = context.ICompareGreaterOrEqual(value, Const(value.Type, 0)); + + return context.ConditionalSelect(isPositive, value, context.Negate(value)); + } + + public static void Vadd_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FaddS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF32(context, Intrinsic.X86Addss, Intrinsic.X86Addsd); + } + else if (Optimizations.FastFP) + { + EmitScalarBinaryOpF32(context, (op1, op2) => context.Add(op1, op2)); + } + else + { + EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, op2)); + } + } + + public static void Vadd_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FaddV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF32(context, Intrinsic.X86Addps, Intrinsic.X86Addpd); + } + else if (Optimizations.FastFP) + { + EmitVectorBinaryOpF32(context, (op1, op2) => context.Add(op1, op2)); + } + else + { + EmitVectorBinaryOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPAddFpscr), op1, op2)); + } + } + + public static void Vadd_I(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PaddInstruction[op.Size], op1, op2)); + } + else + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.Add(op1, op2)); + } + } + + public static void Vaddl_I(ArmEmitterContext context) + { + OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp; + + EmitVectorBinaryLongOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U); + } + + public static void Vaddw_I(ArmEmitterContext context) + { + OpCode32SimdRegWide op = (OpCode32SimdRegWide)context.CurrOp; + + EmitVectorBinaryWideOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U); + } + + public static void Vcnt(ArmEmitterContext context) + { + OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount(); + + for (int index = 0; index < elems; index++) + { + Operand de; + Operand me = EmitVectorExtractZx32(context, op.Qm, op.Im + index, op.Size); + + if (Optimizations.UsePopCnt) + { + de = context.AddIntrinsicInt(Intrinsic.X86Popcnt, me); + } + else + { + de = EmitCountSetBits8(context, me); + } + + res = EmitVectorInsert(context, res, de, op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void Vdup(ArmEmitterContext context) + { + OpCode32SimdDupGP op = (OpCode32SimdDupGP)context.CurrOp; + + Operand insert = GetIntA32(context, op.Rt); + + // Zero extend into an I64, then replicate. Saves the most time over elementwise inserts. + insert = op.Size switch + { + 2 => context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)), + 1 => context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)), + 0 => context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)), + _ => throw new InvalidOperationException($"Invalid Vdup size \"{op.Size}\".") + }; + + InsertScalar(context, op.Vd, insert); + if (op.Q) + { + InsertScalar(context, op.Vd + 1, insert); + } + } + + public static void Vdup_1(ArmEmitterContext context) + { + OpCode32SimdDupElem op = (OpCode32SimdDupElem)context.CurrOp; + + Operand insert = EmitVectorExtractZx32(context, op.Vm >> 1, ((op.Vm & 1) << (3 - op.Size)) + op.Index, op.Size); + + // Zero extend into an I64, then replicate. Saves the most time over elementwise inserts. + insert = op.Size switch + { + 2 => context.Multiply(context.ZeroExtend32(OperandType.I64, insert), Const(0x0000000100000001u)), + 1 => context.Multiply(context.ZeroExtend16(OperandType.I64, insert), Const(0x0001000100010001u)), + 0 => context.Multiply(context.ZeroExtend8(OperandType.I64, insert), Const(0x0101010101010101u)), + _ => throw new InvalidOperationException($"Invalid Vdup size \"{op.Size}\".") + }; + + InsertScalar(context, op.Vd, insert); + if (op.Q) + { + InsertScalar(context, op.Vd | 1, insert); + } + } + + private static (long, long) MaskHelperByteSequence(int start, int length, int startByte) + { + int end = start + length; + int b = startByte; + long result = 0; + long result2 = 0; + for (int i = 0; i < 8; i++) + { + result |= (long)((i >= end || i < start) ? 0x80 : b++) << (i * 8); + } + for (int i = 8; i < 16; i++) + { + result2 |= (long)((i >= end || i < start) ? 0x80 : b++) << ((i - 8) * 8); + } + return (result2, result); + } + + public static void Vext(ArmEmitterContext context) + { + OpCode32SimdExt op = (OpCode32SimdExt)context.CurrOp; + int elems = op.GetBytesCount(); + int byteOff = op.Immediate; + + if (Optimizations.UseSsse3) + { + EmitVectorBinaryOpSimd32(context, (n, m) => + { + // Writing low to high of d: start <imm> into n, overlap into m. + // Then rotate n down by <imm>, m up by (elems)-imm. + // Then OR them together for the result. + + (long nMaskHigh, long nMaskLow) = MaskHelperByteSequence(0, elems - byteOff, byteOff); + (long mMaskHigh, long mMaskLow) = MaskHelperByteSequence(elems - byteOff, byteOff, 0); + Operand nMask, mMask; + if (!op.Q) + { + // Do the same operation to the bytes in the top doubleword too, as our target could be in either. + nMaskHigh = nMaskLow + 0x0808080808080808L; + mMaskHigh = mMaskLow + 0x0808080808080808L; + } + nMask = X86GetElements(context, nMaskHigh, nMaskLow); + mMask = X86GetElements(context, mMaskHigh, mMaskLow); + Operand nPart = context.AddIntrinsic(Intrinsic.X86Pshufb, n, nMask); + Operand mPart = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mMask); + + return context.AddIntrinsic(Intrinsic.X86Por, nPart, mPart); + }); + } + else + { + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < elems; index++) + { + Operand extract; + + if (byteOff >= elems) + { + extract = EmitVectorExtractZx32(context, op.Qm, op.Im + (byteOff - elems), op.Size); + } + else + { + extract = EmitVectorExtractZx32(context, op.Qn, op.In + byteOff, op.Size); + } + byteOff++; + + res = EmitVectorInsert(context, res, extract, op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + } + + public static void Vfma_S(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmaddS); + } + else if (Optimizations.FastFP && Optimizations.UseFma) + { + EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmadd231ss, Intrinsic.X86Vfmadd231sd); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd); + } + else + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulAdd), op1, op2, op3); + }); + } + } + + public static void Vfma_V(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlaV); + } + else if (Optimizations.FastFP && Optimizations.UseFma) + { + EmitVectorTernaryOpF32(context, Intrinsic.X86Vfmadd231ps); + } + else + { + EmitVectorTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulAddFpscr), op1, op2, op3); + }); + } + } + + public static void Vfms_S(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmsubS); + } + else if (Optimizations.FastFP && Optimizations.UseFma) + { + EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmadd231ss, Intrinsic.X86Vfnmadd231sd); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd); + } + else + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMulSub), op1, op2, op3); + }); + } + } + + public static void Vfms_V(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlsV); + } + else if (Optimizations.FastFP && Optimizations.UseFma) + { + EmitVectorTernaryOpF32(context, Intrinsic.X86Vfnmadd231ps); + } + else + { + EmitVectorTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulSubFpscr), op1, op2, op3); + }); + } + } + + public static void Vfnma_S(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmaddS); + } + else if (Optimizations.FastFP && Optimizations.UseFma) + { + EmitScalarTernaryOpF32(context, Intrinsic.X86Vfnmsub231ss, Intrinsic.X86Vfnmsub231sd); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd, isNegD: true); + } + else + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulAdd), op1, op2, op3); + }); + } + } + + public static void Vfnms_S(ArmEmitterContext context) // Fused. + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmsubS); + } + else if (Optimizations.FastFP && Optimizations.UseFma) + { + EmitScalarTernaryOpF32(context, Intrinsic.X86Vfmsub231ss, Intrinsic.X86Vfmsub231sd); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd, isNegD: true); + } + else + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPNegMulSub), op1, op2, op3); + }); + } + } + + public static void Vhadd(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + if (op.U) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.ShiftRightUI(context.Add(op1, op2), Const(1))); + } + else + { + EmitVectorBinaryOpSx32(context, (op1, op2) => context.ShiftRightSI(context.Add(op1, op2), Const(1))); + } + } + + public static void Vmov_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarUnaryOpF32(context, 0, 0); + } + else + { + EmitScalarUnaryOpF32(context, (op1) => op1); + } + } + + public static void Vmovn(ArmEmitterContext context) + { + EmitVectorUnaryNarrowOp32(context, (op1) => op1); + } + + public static void Vneg_S(ArmEmitterContext context) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FnegS); + } + else if (Optimizations.UseSse2) + { + EmitScalarUnaryOpSimd32(context, (m) => + { + if ((op.Size & 1) == 0) + { + Operand mask = X86GetScalar(context, -0f); + return context.AddIntrinsic(Intrinsic.X86Xorps, mask, m); + } + else + { + Operand mask = X86GetScalar(context, -0d); + return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, m); + } + }); + } + else + { + EmitScalarUnaryOpF32(context, (op1) => context.Negate(op1)); + } + } + + public static void Vnmul_S(ArmEmitterContext context) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FnmulS); + } + else if (Optimizations.UseSse2) + { + EmitScalarBinaryOpSimd32(context, (n, m) => + { + if ((op.Size & 1) == 0) + { + Operand res = context.AddIntrinsic(Intrinsic.X86Mulss, n, m); + Operand mask = X86GetScalar(context, -0f); + return context.AddIntrinsic(Intrinsic.X86Xorps, mask, res); + } + else + { + Operand res = context.AddIntrinsic(Intrinsic.X86Mulsd, n, m); + Operand mask = X86GetScalar(context, -0d); + return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, res); + } + }); + } + else + { + EmitScalarBinaryOpF32(context, (op1, op2) => context.Negate(context.Multiply(op1, op2))); + } + } + + public static void Vnmla_S(ArmEmitterContext context) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmaddS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd, isNegD: true); + } + else if (Optimizations.FastFP) + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return context.Subtract(context.Negate(op1), context.Multiply(op2, op3)); + }); + } + else + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3); + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), context.Negate(op1), res); + }); + } + } + + public static void Vnmls_S(ArmEmitterContext context) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FnmsubS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd, isNegD: true); + } + else if (Optimizations.FastFP) + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return context.Add(context.Negate(op1), context.Multiply(op2, op3)); + }); + } + else + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3); + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), context.Negate(op1), res); + }); + } + } + + public static void Vneg_V(ArmEmitterContext context) + { + OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; + + if (op.F) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FnegV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorUnaryOpSimd32(context, (m) => + { + if ((op.Size & 1) == 0) + { + Operand mask = X86GetAllElements(context, -0f); + return context.AddIntrinsic(Intrinsic.X86Xorps, mask, m); + } + else + { + Operand mask = X86GetAllElements(context, -0d); + return context.AddIntrinsic(Intrinsic.X86Xorpd, mask, m); + } + }); + } + else + { + EmitVectorUnaryOpF32(context, (op1) => context.Negate(op1)); + } + } + else + { + EmitVectorUnaryOpSx32(context, (op1) => context.Negate(op1)); + } + } + + public static void Vdiv_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FdivS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF32(context, Intrinsic.X86Divss, Intrinsic.X86Divsd); + } + else if (Optimizations.FastFP) + { + EmitScalarBinaryOpF32(context, (op1, op2) => context.Divide(op1, op2)); + } + else + { + EmitScalarBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPDiv), op1, op2); + }); + } + } + + public static void Vmaxnm_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FmaxnmS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse41MaxMinNumOpF32(context, true, true); + } + else + { + EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, nameof(SoftFloat32.FPMaxNum), op1, op2)); + } + } + + public static void Vmaxnm_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmaxnmV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse41MaxMinNumOpF32(context, true, false); + } + else + { + EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMaxNumFpscr), op1, op2)); + } + } + + public static void Vminnm_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FminnmS); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse41MaxMinNumOpF32(context, false, true); + } + else + { + EmitScalarBinaryOpF32(context, (op1, op2) => EmitSoftFloatCall(context, nameof(SoftFloat32.FPMinNum), op1, op2)); + } + } + + public static void Vminnm_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FminnmV); + } + else if (Optimizations.FastFP && Optimizations.UseSse41) + { + EmitSse41MaxMinNumOpF32(context, false, false); + } + else + { + EmitVectorBinaryOpSx32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMinNumFpscr), op1, op2)); + } + } + + public static void Vmax_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmaxV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF32(context, Intrinsic.X86Maxps, Intrinsic.X86Maxpd); + } + else + { + EmitVectorBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMaxFpscr), op1, op2); + }); + } + } + + public static void Vmax_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + if (op.U) + { + if (Optimizations.UseSse2) + { + EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PmaxuInstruction[op.Size], op1, op2)); + } + else + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareGreaterUI(op1, op2), op1, op2)); + } + } + else + { + if (Optimizations.UseSse2) + { + EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PmaxsInstruction[op.Size], op1, op2)); + } + else + { + EmitVectorBinaryOpSx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareGreater(op1, op2), op1, op2)); + } + } + } + + public static void Vmin_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FminV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF32(context, Intrinsic.X86Minps, Intrinsic.X86Minpd); + } + else + { + EmitVectorBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMinFpscr), op1, op2); + }); + } + } + + public static void Vmin_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + if (op.U) + { + if (Optimizations.UseSse2) + { + EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PminuInstruction[op.Size], op1, op2)); + } + else + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLessUI(op1, op2), op1, op2)); + } + } + else + { + if (Optimizations.UseSse2) + { + EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PminsInstruction[op.Size], op1, op2)); + } + else + { + EmitVectorBinaryOpSx32(context, (op1, op2) => context.ConditionalSelect(context.ICompareLess(op1, op2), op1, op2)); + } + } + } + + public static void Vmla_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmaddS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Addss, Intrinsic.X86Addsd); + } + else if (Optimizations.FastFP) + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return context.Add(op1, context.Multiply(op2, op3)); + }); + } + else + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3); + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPAdd), op1, res); + }); + } + } + + public static void Vmla_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlaV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd); + } + else if (Optimizations.FastFP) + { + EmitVectorTernaryOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3))); + } + else + { + EmitVectorTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulAddFpscr), op1, op2, op3); + }); + } + } + + public static void Vmla_I(ArmEmitterContext context) + { + EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3))); + } + + public static void Vmla_1(ArmEmitterContext context) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + if (op.F) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorsByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Addps, Intrinsic.X86Addpd); + } + else if (Optimizations.FastFP) + { + EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3))); + } + else + { + EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulAddFpscr), op1, op2, op3)); + } + } + else + { + EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Add(op1, context.Multiply(op2, op3)), false); + } + } + + public static void Vmlal_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + EmitVectorTernaryLongOpI32(context, (d, n, m) => context.Add(d, context.Multiply(n, m)), !op.U); + } + + public static void Vmls_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarTernaryOpF32(context, Intrinsic.Arm64FmlsV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarTernaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd, Intrinsic.X86Subss, Intrinsic.X86Subsd); + } + else if (Optimizations.FastFP) + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + return context.Subtract(op1, context.Multiply(op2, op3)); + }); + } + else + { + EmitScalarTernaryOpF32(context, (op1, op2, op3) => + { + Operand res = EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op2, op3); + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSub), op1, res); + }); + } + } + + public static void Vmls_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorTernaryOpF32(context, Intrinsic.Arm64FmlsV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorTernaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd); + } + else if (Optimizations.FastFP) + { + EmitVectorTernaryOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3))); + } + else + { + EmitVectorTernaryOpF32(context, (op1, op2, op3) => + { + return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulSubFpscr), op1, op2, op3); + }); + } + } + + public static void Vmls_I(ArmEmitterContext context) + { + EmitVectorTernaryOpZx32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3))); + } + + public static void Vmls_1(ArmEmitterContext context) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + if (op.F) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorsByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd, Intrinsic.X86Subps, Intrinsic.X86Subpd); + } + else if (Optimizations.FastFP) + { + EmitVectorsByScalarOpF32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3))); + } + else + { + EmitVectorsByScalarOpF32(context, (op1, op2, op3) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulSubFpscr), op1, op2, op3)); + } + } + else + { + EmitVectorsByScalarOpI32(context, (op1, op2, op3) => context.Subtract(op1, context.Multiply(op2, op3)), false); + } + } + + public static void Vmlsl_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + EmitVectorTernaryLongOpI32(context, (opD, op1, op2) => context.Subtract(opD, context.Multiply(op1, op2)), !op.U); + } + + public static void Vmul_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FmulS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF32(context, Intrinsic.X86Mulss, Intrinsic.X86Mulsd); + } + else if (Optimizations.FastFP) + { + EmitScalarBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2)); + } + else + { + EmitScalarBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPMul), op1, op2); + }); + } + } + + public static void Vmul_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FmulV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd); + } + else if (Optimizations.FastFP) + { + EmitVectorBinaryOpF32(context, (op1, op2) => context.Multiply(op1, op2)); + } + else + { + EmitVectorBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulFpscr), op1, op2); + }); + } + } + + public static void Vmul_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + if (op.U) // This instruction is always signed, U indicates polynomial mode. + { + EmitVectorBinaryOpZx32(context, (op1, op2) => EmitPolynomialMultiply(context, op1, op2, 8 << op.Size)); + } + else + { + EmitVectorBinaryOpSx32(context, (op1, op2) => context.Multiply(op1, op2)); + } + } + + public static void Vmul_1(ArmEmitterContext context) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + if (op.F) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorByScalarOpF32(context, Intrinsic.X86Mulps, Intrinsic.X86Mulpd); + } + else if (Optimizations.FastFP) + { + EmitVectorByScalarOpF32(context, (op1, op2) => context.Multiply(op1, op2)); + } + else + { + EmitVectorByScalarOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMulFpscr), op1, op2)); + } + } + else + { + EmitVectorByScalarOpI32(context, (op1, op2) => context.Multiply(op1, op2), false); + } + } + + public static void Vmull_1(ArmEmitterContext context) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + EmitVectorByScalarLongOpI32(context, (op1, op2) => context.Multiply(op1, op2), !op.U); + } + + public static void Vmull_I(ArmEmitterContext context) + { + OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp; + + if (op.Polynomial) + { + if (op.Size == 0) // P8 + { + EmitVectorBinaryLongOpI32(context, (op1, op2) => EmitPolynomialMultiply(context, op1, op2, 8 << op.Size), false); + } + else /* if (op.Size == 2) // P64 */ + { + Operand ne = context.VectorExtract(OperandType.I64, GetVec(op.Qn), op.Vn & 1); + Operand me = context.VectorExtract(OperandType.I64, GetVec(op.Qm), op.Vm & 1); + + Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.PolynomialMult64_128)), ne, me); + + context.Copy(GetVecA32(op.Qd), res); + } + } + else + { + EmitVectorBinaryLongOpI32(context, (op1, op2) => context.Multiply(op1, op2), !op.U); + } + } + + public static void Vpadd_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FaddpV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Addps); + } + else + { + EmitVectorPairwiseOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPAddFpscr), op1, op2)); + } + } + + public static void Vpadd_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + if (Optimizations.UseSsse3) + { + EmitSsse3VectorPairwiseOp32(context, X86PaddInstruction); + } + else + { + EmitVectorPairwiseOpI32(context, (op1, op2) => context.Add(op1, op2), !op.U); + } + } + + public static void Vpaddl(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + EmitVectorPairwiseLongOpI32(context, (op1, op2) => context.Add(op1, op2), (op.Opc & 1) == 0); + } + + public static void Vpmax_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FmaxpV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Maxps); + } + else + { + EmitVectorPairwiseOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat64.FPMaxFpscr), op1, op2)); + } + } + + public static void Vpmax_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + if (Optimizations.UseSsse3) + { + EmitSsse3VectorPairwiseOp32(context, op.U ? X86PmaxuInstruction : X86PmaxsInstruction); + } + else + { + EmitVectorPairwiseOpI32(context, (op1, op2) => + { + Operand greater = op.U ? context.ICompareGreaterUI(op1, op2) : context.ICompareGreater(op1, op2); + return context.ConditionalSelect(greater, op1, op2); + }, !op.U); + } + } + + public static void Vpmin_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorPairwiseOpF32(context, Intrinsic.Arm64FminpV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2VectorPairwiseOpF32(context, Intrinsic.X86Minps); + } + else + { + EmitVectorPairwiseOpF32(context, (op1, op2) => EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPMinFpscr), op1, op2)); + } + } + + public static void Vpmin_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + if (Optimizations.UseSsse3) + { + EmitSsse3VectorPairwiseOp32(context, op.U ? X86PminuInstruction : X86PminsInstruction); + } + else + { + EmitVectorPairwiseOpI32(context, (op1, op2) => + { + Operand greater = op.U ? context.ICompareLessUI(op1, op2) : context.ICompareLess(op1, op2); + return context.ConditionalSelect(greater, op1, op2); + }, !op.U); + } + } + + public static void Vqadd(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + EmitSaturatingAddSubBinaryOp(context, add: true, !op.U); + } + + public static void Vqdmulh(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + int eSize = 8 << op.Size; + + EmitVectorBinaryOpI32(context, (op1, op2) => + { + if (op.Size == 2) + { + op1 = context.SignExtend32(OperandType.I64, op1); + op2 = context.SignExtend32(OperandType.I64, op2); + } + + Operand res = context.Multiply(op1, op2); + res = context.ShiftRightSI(res, Const(eSize - 1)); + res = EmitSatQ(context, res, eSize, signedSrc: true, signedDst: true); + + if (op.Size == 2) + { + res = context.ConvertI64ToI32(res); + } + + return res; + }, signed: true); + } + + public static void Vqmovn(ArmEmitterContext context) + { + OpCode32SimdMovn op = (OpCode32SimdMovn)context.CurrOp; + + bool signed = !op.Q; + + EmitVectorUnaryNarrowOp32(context, (op1) => EmitSatQ(context, op1, 8 << op.Size, signed, signed), signed); + } + + public static void Vqmovun(ArmEmitterContext context) + { + OpCode32SimdMovn op = (OpCode32SimdMovn)context.CurrOp; + + EmitVectorUnaryNarrowOp32(context, (op1) => EmitSatQ(context, op1, 8 << op.Size, signedSrc: true, signedDst: false), signed: true); + } + + public static void Vqsub(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + EmitSaturatingAddSubBinaryOp(context, add: false, !op.U); + } + + public static void Vrev(ArmEmitterContext context) + { + OpCode32SimdRev op = (OpCode32SimdRev)context.CurrOp; + + if (Optimizations.UseSsse3) + { + EmitVectorUnaryOpSimd32(context, (op1) => + { + Operand mask; + switch (op.Size) + { + case 3: + // Rev64 + switch (op.Opc) + { + case 0: + mask = X86GetElements(context, 0x08090a0b0c0d0e0fL, 0x0001020304050607L); + return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask); + case 1: + mask = X86GetElements(context, 0x09080b0a0d0c0f0eL, 0x0100030205040706L); + return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask); + case 2: + return context.AddIntrinsic(Intrinsic.X86Shufps, op1, op1, Const(1 | (0 << 2) | (3 << 4) | (2 << 6))); + } + break; + case 2: + // Rev32 + switch (op.Opc) + { + case 0: + mask = X86GetElements(context, 0x0c0d0e0f_08090a0bL, 0x04050607_00010203L); + return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask); + case 1: + mask = X86GetElements(context, 0x0d0c0f0e_09080b0aL, 0x05040706_01000302L); + return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask); + } + break; + case 1: + // Rev16 + mask = X86GetElements(context, 0x0e0f_0c0d_0a0b_0809L, 0x_0607_0405_0203_0001L); + return context.AddIntrinsic(Intrinsic.X86Pshufb, op1, mask); + } + + throw new InvalidOperationException("Invalid VREV Opcode + Size combo."); // Should be unreachable. + }); + } + else + { + EmitVectorUnaryOpZx32(context, (op1) => + { + switch (op.Opc) + { + case 0: + switch (op.Size) // Swap bytes. + { + case 1: + return InstEmitAluHelper.EmitReverseBytes16_32Op(context, op1); + case 2: + case 3: + return context.ByteSwap(op1); + } + break; + case 1: + switch (op.Size) + { + case 2: + return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffff0000)), Const(16)), + context.ShiftLeft(context.BitwiseAnd(op1, Const(0x0000ffff)), Const(16))); + case 3: + return context.BitwiseOr( + context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffff000000000000ul)), Const(48)), + context.ShiftLeft(context.BitwiseAnd(op1, Const(0x000000000000fffful)), Const(48))), + context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0x0000ffff00000000ul)), Const(16)), + context.ShiftLeft(context.BitwiseAnd(op1, Const(0x00000000ffff0000ul)), Const(16)))); + } + break; + case 2: + // Swap upper and lower halves. + return context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op1, Const(0xffffffff00000000ul)), Const(32)), + context.ShiftLeft(context.BitwiseAnd(op1, Const(0x00000000fffffffful)), Const(32))); + } + + throw new InvalidOperationException("Invalid VREV Opcode + Size combo."); // Should be unreachable. + }); + } + } + + public static void Vrecpe(ArmEmitterContext context) + { + OpCode32SimdSqrte op = (OpCode32SimdSqrte)context.CurrOp; + + if (op.F) + { + int sizeF = op.Size & 1; + + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrecpeV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0) + { + EmitVectorUnaryOpF32(context, Intrinsic.X86Rcpps, 0); + } + else + { + EmitVectorUnaryOpF32(context, (op1) => + { + return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPRecipEstimateFpscr), op1); + }); + } + } + else + { + throw new NotImplementedException("Integer Vrecpe not currently implemented."); + } + } + + public static void Vrecps(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FrecpsV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + bool single = (op.Size & 1) == 0; + + // (2 - (n*m)) + EmitVectorBinaryOpSimd32(context, (n, m) => + { + if (single) + { + Operand maskTwo = X86GetAllElements(context, 2f); + + Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m); + + return context.AddIntrinsic(Intrinsic.X86Subps, maskTwo, res); + } + else + { + Operand maskTwo = X86GetAllElements(context, 2d); + + Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m); + + return context.AddIntrinsic(Intrinsic.X86Subpd, maskTwo, res); + } + }); + } + else + { + EmitVectorBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRecipStep), op1, op2); + }); + } + } + + public static void Vrhadd(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + EmitVectorBinaryOpI32(context, (op1, op2) => + { + if (op.Size == 2) + { + op1 = context.ZeroExtend32(OperandType.I64, op1); + op2 = context.ZeroExtend32(OperandType.I64, op2); + } + + Operand res = context.Add(context.Add(op1, op2), Const(op1.Type, 1L)); + res = context.ShiftRightUI(res, Const(1)); + + if (op.Size == 2) + { + res = context.ConvertI64ToI32(res); + } + + return res; + }, !op.U); + } + + public static void Vrsqrte(ArmEmitterContext context) + { + OpCode32SimdSqrte op = (OpCode32SimdSqrte)context.CurrOp; + + if (op.F) + { + int sizeF = op.Size & 1; + + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrsqrteV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2 && sizeF == 0) + { + EmitVectorUnaryOpF32(context, Intrinsic.X86Rsqrtps, 0); + } + else + { + EmitVectorUnaryOpF32(context, (op1) => + { + return EmitSoftFloatCallDefaultFpscr(context, nameof(SoftFloat32.FPRSqrtEstimateFpscr), op1); + }); + } + } + else + { + throw new NotImplementedException("Integer Vrsqrte not currently implemented."); + } + } + + public static void Vrsqrts(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FrsqrtsV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + bool single = (op.Size & 1) == 0; + + // (3 - (n*m)) / 2 + EmitVectorBinaryOpSimd32(context, (n, m) => + { + if (single) + { + Operand maskHalf = X86GetAllElements(context, 0.5f); + Operand maskThree = X86GetAllElements(context, 3f); + + Operand res = context.AddIntrinsic(Intrinsic.X86Mulps, n, m); + + res = context.AddIntrinsic(Intrinsic.X86Subps, maskThree, res); + return context.AddIntrinsic(Intrinsic.X86Mulps, maskHalf, res); + } + else + { + Operand maskHalf = X86GetAllElements(context, 0.5d); + Operand maskThree = X86GetAllElements(context, 3d); + + Operand res = context.AddIntrinsic(Intrinsic.X86Mulpd, n, m); + + res = context.AddIntrinsic(Intrinsic.X86Subpd, maskThree, res); + return context.AddIntrinsic(Intrinsic.X86Mulpd, maskHalf, res); + } + }); + } + else + { + EmitVectorBinaryOpF32(context, (op1, op2) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPRSqrtStep), op1, op2); + }); + } + } + + public static void Vsel(ArmEmitterContext context) + { + OpCode32SimdSel op = (OpCode32SimdSel)context.CurrOp; + + Operand condition = default; + + switch (op.Cc) + { + case OpCode32SimdSelMode.Eq: + condition = GetCondTrue(context, Condition.Eq); + break; + case OpCode32SimdSelMode.Ge: + condition = GetCondTrue(context, Condition.Ge); + break; + case OpCode32SimdSelMode.Gt: + condition = GetCondTrue(context, Condition.Gt); + break; + case OpCode32SimdSelMode.Vs: + condition = GetCondTrue(context, Condition.Vs); + break; + } + + EmitScalarBinaryOpI32(context, (op1, op2) => + { + return context.ConditionalSelect(condition, op1, op2); + }); + } + + public static void Vsqrt_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FsqrtS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarUnaryOpF32(context, Intrinsic.X86Sqrtss, Intrinsic.X86Sqrtsd); + } + else + { + EmitScalarUnaryOpF32(context, (op1) => + { + return EmitSoftFloatCall(context, nameof(SoftFloat32.FPSqrt), op1); + }); + } + } + + public static void Vsub_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarBinaryOpF32(context, Intrinsic.Arm64FsubS); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitScalarBinaryOpF32(context, Intrinsic.X86Subss, Intrinsic.X86Subsd); + } + else + { + EmitScalarBinaryOpF32(context, (op1, op2) => context.Subtract(op1, op2)); + } + } + + public static void Vsub_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpF32(context, Intrinsic.Arm64FsubV); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitVectorBinaryOpF32(context, Intrinsic.X86Subps, Intrinsic.X86Subpd); + } + else + { + EmitVectorBinaryOpF32(context, (op1, op2) => context.Subtract(op1, op2)); + } + } + + public static void Vsub_I(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + EmitVectorBinaryOpSimd32(context, (op1, op2) => context.AddIntrinsic(X86PsubInstruction[op.Size], op1, op2)); + } + else + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.Subtract(op1, op2)); + } + } + + public static void Vsubl_I(ArmEmitterContext context) + { + OpCode32SimdRegLong op = (OpCode32SimdRegLong)context.CurrOp; + + EmitVectorBinaryLongOpI32(context, (op1, op2) => context.Subtract(op1, op2), !op.U); + } + + public static void Vsubw_I(ArmEmitterContext context) + { + OpCode32SimdRegWide op = (OpCode32SimdRegWide)context.CurrOp; + + EmitVectorBinaryWideOpI32(context, (op1, op2) => context.Subtract(op1, op2), !op.U); + } + + private static void EmitSaturatingAddSubBinaryOp(ArmEmitterContext context, bool add, bool signed) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + EmitVectorBinaryOpI32(context, (ne, me) => + { + if (op.Size <= 2) + { + if (op.Size == 2) + { + ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne); + me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me); + } + + Operand res = add ? context.Add(ne, me) : context.Subtract(ne, me); + + res = EmitSatQ(context, res, 8 << op.Size, signedSrc: true, signed); + + if (op.Size == 2) + { + res = context.ConvertI64ToI32(res); + } + + return res; + } + else if (add) /* if (op.Size == 3) */ + { + return signed + ? EmitBinarySignedSatQAdd(context, ne, me) + : EmitBinaryUnsignedSatQAdd(context, ne, me); + } + else /* if (sub) */ + { + return signed + ? EmitBinarySignedSatQSub(context, ne, me) + : EmitBinaryUnsignedSatQSub(context, ne, me); + } + }, signed); + } + + private static void EmitSse41MaxMinNumOpF32(ArmEmitterContext context, bool isMaxNum, bool scalar) + { + IOpCode32Simd op = (IOpCode32Simd)context.CurrOp; + + Func<Operand, Operand, Operand> genericEmit = (n, m) => + { + Operand nNum = context.Copy(n); + Operand mNum = context.Copy(m); + + InstEmit.EmitSse2VectorIsNaNOpF(context, nNum, out Operand nQNaNMask, out _, isQNaN: true); + InstEmit.EmitSse2VectorIsNaNOpF(context, mNum, out Operand mQNaNMask, out _, isQNaN: true); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand negInfMask = X86GetAllElements(context, isMaxNum ? float.NegativeInfinity : float.PositiveInfinity); + + Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnps, mQNaNMask, nQNaNMask); + Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnps, nQNaNMask, mQNaNMask); + + nNum = context.AddIntrinsic(Intrinsic.X86Blendvps, nNum, negInfMask, nMask); + mNum = context.AddIntrinsic(Intrinsic.X86Blendvps, mNum, negInfMask, mMask); + + return context.AddIntrinsic(isMaxNum ? Intrinsic.X86Maxps : Intrinsic.X86Minps, nNum, mNum); + } + else /* if (sizeF == 1) */ + { + Operand negInfMask = X86GetAllElements(context, isMaxNum ? double.NegativeInfinity : double.PositiveInfinity); + + Operand nMask = context.AddIntrinsic(Intrinsic.X86Andnpd, mQNaNMask, nQNaNMask); + Operand mMask = context.AddIntrinsic(Intrinsic.X86Andnpd, nQNaNMask, mQNaNMask); + + nNum = context.AddIntrinsic(Intrinsic.X86Blendvpd, nNum, negInfMask, nMask); + mNum = context.AddIntrinsic(Intrinsic.X86Blendvpd, mNum, negInfMask, mMask); + + return context.AddIntrinsic(isMaxNum ? Intrinsic.X86Maxpd : Intrinsic.X86Minpd, nNum, mNum); + } + }; + + if (scalar) + { + EmitScalarBinaryOpSimd32(context, genericEmit); + } + else + { + EmitVectorBinaryOpSimd32(context, genericEmit); + } + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCmp.cs b/src/ARMeilleure/Instructions/InstEmitSimdCmp.cs new file mode 100644 index 00000000..c32b64ba --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSimdCmp.cs @@ -0,0 +1,799 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + using Func2I = Func<Operand, Operand, Operand>; + + static partial class InstEmit + { + public static void Cmeq_S(ArmEmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareEqual(op1, op2), scalar: true); + } + + public static void Cmeq_V(ArmEmitterContext context) + { + if (Optimizations.UseSse41) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m; + + if (op is OpCodeSimdReg binOp) + { + m = GetVec(binOp.Rm); + } + else + { + m = context.VectorZero(); + } + + Intrinsic cmpInst = X86PcmpeqInstruction[op.Size]; + + Operand res = context.AddIntrinsic(cmpInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareEqual(op1, op2), scalar: false); + } + } + + public static void Cmge_S(ArmEmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqual(op1, op2), scalar: true); + } + + public static void Cmge_V(ArmEmitterContext context) + { + if (Optimizations.UseSse42) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m; + + if (op is OpCodeSimdReg binOp) + { + m = GetVec(binOp.Rm); + } + else + { + m = context.VectorZero(); + } + + Intrinsic cmpInst = X86PcmpgtInstruction[op.Size]; + + Operand res = context.AddIntrinsic(cmpInst, m, n); + + Operand mask = X86GetAllElements(context, -1L); + + res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqual(op1, op2), scalar: false); + } + } + + public static void Cmgt_S(ArmEmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreater(op1, op2), scalar: true); + } + + public static void Cmgt_V(ArmEmitterContext context) + { + if (Optimizations.UseSse42) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m; + + if (op is OpCodeSimdReg binOp) + { + m = GetVec(binOp.Rm); + } + else + { + m = context.VectorZero(); + } + + Intrinsic cmpInst = X86PcmpgtInstruction[op.Size]; + + Operand res = context.AddIntrinsic(cmpInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreater(op1, op2), scalar: false); + } + } + + public static void Cmhi_S(ArmEmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreaterUI(op1, op2), scalar: true); + } + + public static void Cmhi_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 3) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic maxInst = X86PmaxuInstruction[op.Size]; + + Operand res = context.AddIntrinsic(maxInst, m, n); + + Intrinsic cmpInst = X86PcmpeqInstruction[op.Size]; + + res = context.AddIntrinsic(cmpInst, res, m); + + Operand mask = X86GetAllElements(context, -1L); + + res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreaterUI(op1, op2), scalar: false); + } + } + + public static void Cmhs_S(ArmEmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqualUI(op1, op2), scalar: true); + } + + public static void Cmhs_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse41 && op.Size < 3) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic maxInst = X86PmaxuInstruction[op.Size]; + + Operand res = context.AddIntrinsic(maxInst, n, m); + + Intrinsic cmpInst = X86PcmpeqInstruction[op.Size]; + + res = context.AddIntrinsic(cmpInst, res, n); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareGreaterOrEqualUI(op1, op2), scalar: false); + } + } + + public static void Cmle_S(ArmEmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareLessOrEqual(op1, op2), scalar: true); + } + + public static void Cmle_V(ArmEmitterContext context) + { + if (Optimizations.UseSse42) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Intrinsic cmpInst = X86PcmpgtInstruction[op.Size]; + + Operand res = context.AddIntrinsic(cmpInst, n, context.VectorZero()); + + Operand mask = X86GetAllElements(context, -1L); + + res = context.AddIntrinsic(Intrinsic.X86Pandn, res, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareLessOrEqual(op1, op2), scalar: false); + } + } + + public static void Cmlt_S(ArmEmitterContext context) + { + EmitCmpOp(context, (op1, op2) => context.ICompareLess(op1, op2), scalar: true); + } + + public static void Cmlt_V(ArmEmitterContext context) + { + if (Optimizations.UseSse42) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Intrinsic cmpInst = X86PcmpgtInstruction[op.Size]; + + Operand res = context.AddIntrinsic(cmpInst, context.VectorZero(), n); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitCmpOp(context, (op1, op2) => context.ICompareLess(op1, op2), scalar: false); + } + } + + public static void Cmtst_S(ArmEmitterContext context) + { + EmitCmtstOp(context, scalar: true); + } + + public static void Cmtst_V(ArmEmitterContext context) + { + EmitCmtstOp(context, scalar: false); + } + + public static void Facge_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAvx) + { + EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThanOrEqual, scalar: true, absolute: true); + } + else + { + EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGE), scalar: true, absolute: true); + } + } + + public static void Facge_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAvx) + { + EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThanOrEqual, scalar: false, absolute: true); + } + else + { + EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGE), scalar: false, absolute: true); + } + } + + public static void Facgt_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAvx) + { + EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThan, scalar: true, absolute: true); + } + else + { + EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGT), scalar: true, absolute: true); + } + } + + public static void Facgt_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAvx) + { + EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThan, scalar: false, absolute: true); + } + else + { + EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGT), scalar: false, absolute: true); + } + } + + public static void Fccmp_S(ArmEmitterContext context) + { + EmitFccmpOrFccmpe(context, signalNaNs: false); + } + + public static void Fccmpe_S(ArmEmitterContext context) + { + EmitFccmpOrFccmpe(context, signalNaNs: true); + } + + public static void Fcmeq_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2OrAvxCmpOpF(context, CmpCondition.Equal, scalar: true); + } + else + { + EmitCmpOpF(context, nameof(SoftFloat32.FPCompareEQ), scalar: true); + } + } + + public static void Fcmeq_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2OrAvxCmpOpF(context, CmpCondition.Equal, scalar: false); + } + else + { + EmitCmpOpF(context, nameof(SoftFloat32.FPCompareEQ), scalar: false); + } + } + + public static void Fcmge_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAvx) + { + EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThanOrEqual, scalar: true); + } + else + { + EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGE), scalar: true); + } + } + + public static void Fcmge_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAvx) + { + EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThanOrEqual, scalar: false); + } + else + { + EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGE), scalar: false); + } + } + + public static void Fcmgt_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAvx) + { + EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThan, scalar: true); + } + else + { + EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGT), scalar: true); + } + } + + public static void Fcmgt_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAvx) + { + EmitSse2OrAvxCmpOpF(context, CmpCondition.GreaterThan, scalar: false); + } + else + { + EmitCmpOpF(context, nameof(SoftFloat32.FPCompareGT), scalar: false); + } + } + + public static void Fcmle_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2OrAvxCmpOpF(context, CmpCondition.LessThanOrEqual, scalar: true); + } + else + { + EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLE), scalar: true); + } + } + + public static void Fcmle_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2OrAvxCmpOpF(context, CmpCondition.LessThanOrEqual, scalar: false); + } + else + { + EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLE), scalar: false); + } + } + + public static void Fcmlt_S(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2OrAvxCmpOpF(context, CmpCondition.LessThan, scalar: true); + } + else + { + EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLT), scalar: true); + } + } + + public static void Fcmlt_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2OrAvxCmpOpF(context, CmpCondition.LessThan, scalar: false); + } + else + { + EmitCmpOpF(context, nameof(SoftFloat32.FPCompareLT), scalar: false); + } + } + + public static void Fcmp_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitFcmpOrFcmpe(context, signalNaNs: false); + } + else + { + EmitFcmpOrFcmpe(context, signalNaNs: false); + } + } + + public static void Fcmpe_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitFcmpOrFcmpe(context, signalNaNs: true); + } + else + { + EmitFcmpOrFcmpe(context, signalNaNs: true); + } + } + + private static void EmitFccmpOrFccmpe(ArmEmitterContext context, bool signalNaNs) + { + OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp; + + Operand lblTrue = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblTrue, InstEmitFlowHelper.GetCondTrue(context, op.Cond)); + + EmitSetNzcv(context, op.Nzcv); + + context.Branch(lblEnd); + + context.MarkLabel(lblTrue); + + EmitFcmpOrFcmpe(context, signalNaNs); + + context.MarkLabel(lblEnd); + } + + private static void EmitSetNzcv(ArmEmitterContext context, int nzcv) + { + Operand Extract(int value, int bit) + { + if (bit != 0) + { + value >>= bit; + } + + value &= 1; + + return Const(value); + } + + SetFlag(context, PState.VFlag, Extract(nzcv, 0)); + SetFlag(context, PState.CFlag, Extract(nzcv, 1)); + SetFlag(context, PState.ZFlag, Extract(nzcv, 2)); + SetFlag(context, PState.NFlag, Extract(nzcv, 3)); + } + + private static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false; + + if (Optimizations.FastFP && (signalNaNs ? Optimizations.UseAvx : Optimizations.UseSse2)) + { + Operand n = GetVec(op.Rn); + Operand m = cmpWithZero ? context.VectorZero() : GetVec(op.Rm); + + CmpCondition cmpOrdered = signalNaNs ? CmpCondition.OrderedS : CmpCondition.OrderedQ; + + Operand lblNaN = Label(); + Operand lblEnd = Label(); + + if (op.Size == 0) + { + Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const((int)cmpOrdered)); + + Operand isOrdered = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, ordMask); + + context.BranchIfFalse(lblNaN, isOrdered); + + Operand nCopy = context.Copy(n); + Operand mCopy = cmpWithZero ? context.VectorZero() : context.Copy(m); + + Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comissge, nCopy, mCopy); + Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisseq, nCopy, mCopy); + Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisslt, nCopy, mCopy); + + SetFlag(context, PState.VFlag, Const(0)); + SetFlag(context, PState.CFlag, cf); + SetFlag(context, PState.ZFlag, zf); + SetFlag(context, PState.NFlag, nf); + } + else /* if (op.Size == 1) */ + { + Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const((int)cmpOrdered)); + + Operand isOrdered = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, ordMask); + + context.BranchIfFalse(lblNaN, isOrdered); + + Operand nCopy = context.Copy(n); + Operand mCopy = cmpWithZero ? context.VectorZero() : context.Copy(m); + + Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comisdge, nCopy, mCopy); + Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisdeq, nCopy, mCopy); + Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisdlt, nCopy, mCopy); + + SetFlag(context, PState.VFlag, Const(0)); + SetFlag(context, PState.CFlag, cf); + SetFlag(context, PState.ZFlag, zf); + SetFlag(context, PState.NFlag, nf); + } + + context.Branch(lblEnd); + + context.MarkLabel(lblNaN); + + SetFlag(context, PState.VFlag, Const(1)); + SetFlag(context, PState.CFlag, Const(1)); + SetFlag(context, PState.ZFlag, Const(0)); + SetFlag(context, PState.NFlag, Const(0)); + + context.MarkLabel(lblEnd); + } + else + { + OperandType type = op.Size != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand me; + + if (cmpWithZero) + { + me = op.Size == 0 ? ConstF(0f) : ConstF(0d); + } + else + { + me = context.VectorExtract(type, GetVec(op.Rm), 0); + } + + Operand nzcv = EmitSoftFloatCall(context, nameof(SoftFloat32.FPCompare), ne, me, Const(signalNaNs)); + + EmitSetNzcv(context, nzcv); + } + } + + private static void EmitSetNzcv(ArmEmitterContext context, Operand nzcv) + { + Operand Extract(Operand value, int bit) + { + if (bit != 0) + { + value = context.ShiftRightUI(value, Const(bit)); + } + + value = context.BitwiseAnd(value, Const(1)); + + return value; + } + + SetFlag(context, PState.VFlag, Extract(nzcv, 0)); + SetFlag(context, PState.CFlag, Extract(nzcv, 1)); + SetFlag(context, PState.ZFlag, Extract(nzcv, 2)); + SetFlag(context, PState.NFlag, Extract(nzcv, 3)); + } + + private static void EmitCmpOp(ArmEmitterContext context, Func2I emitCmp, bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + ulong szMask = ulong.MaxValue >> (64 - (8 << op.Size)); + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me; + + if (op is OpCodeSimdReg binOp) + { + me = EmitVectorExtractSx(context, binOp.Rm, index, op.Size); + } + else + { + me = Const(0L); + } + + Operand isTrue = emitCmp(ne, me); + + Operand mask = context.ConditionalSelect(isTrue, Const(szMask), Const(0L)); + + res = EmitVectorInsert(context, res, mask, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitCmtstOp(ArmEmitterContext context, bool scalar) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + ulong szMask = ulong.MaxValue >> (64 - (8 << op.Size)); + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + Operand test = context.BitwiseAnd(ne, me); + + Operand isTrue = context.ICompareNotEqual(test, Const(0L)); + + Operand mask = context.ConditionalSelect(isTrue, Const(szMask), Const(0L)); + + res = EmitVectorInsert(context, res, mask, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitCmpOpF(ArmEmitterContext context, string name, bool scalar, bool absolute = false) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = !scalar ? op.GetBytesCount() >> sizeF + 2 : 1; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); + Operand me; + + if (op is OpCodeSimdReg binOp) + { + me = context.VectorExtract(type, GetVec(binOp.Rm), index); + } + else + { + me = sizeF == 0 ? ConstF(0f) : ConstF(0d); + } + + if (absolute) + { + ne = EmitUnaryMathCall(context, nameof(Math.Abs), ne); + me = EmitUnaryMathCall(context, nameof(Math.Abs), me); + } + + Operand e = EmitSoftFloatCall(context, name, ne, me); + + res = context.VectorInsert(res, e, index); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitSse2OrAvxCmpOpF(ArmEmitterContext context, CmpCondition cond, bool scalar, bool absolute = false) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = op is OpCodeSimdReg binOp ? GetVec(binOp.Rm) : context.VectorZero(); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + if (absolute) + { + Operand mask = scalar ? X86GetScalar(context, int.MaxValue) : X86GetAllElements(context, int.MaxValue); + + n = context.AddIntrinsic(Intrinsic.X86Andps, n, mask); + m = context.AddIntrinsic(Intrinsic.X86Andps, m, mask); + } + + Intrinsic inst = scalar ? Intrinsic.X86Cmpss : Intrinsic.X86Cmpps; + + Operand res = context.AddIntrinsic(inst, n, m, Const((int)cond)); + + if (scalar) + { + res = context.VectorZeroUpper96(res); + } + else if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + if (absolute) + { + Operand mask = scalar ? X86GetScalar(context, long.MaxValue) : X86GetAllElements(context, long.MaxValue); + + n = context.AddIntrinsic(Intrinsic.X86Andpd, n, mask); + m = context.AddIntrinsic(Intrinsic.X86Andpd, m, mask); + } + + Intrinsic inst = scalar ? Intrinsic.X86Cmpsd : Intrinsic.X86Cmppd; + + Operand res = context.AddIntrinsic(inst, n, m, Const((int)cond)); + + if (scalar) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCmp32.cs b/src/ARMeilleure/Instructions/InstEmitSimdCmp32.cs new file mode 100644 index 00000000..a990e057 --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSimdCmp32.cs @@ -0,0 +1,437 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper32; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + using Func2I = Func<Operand, Operand, Operand>; + + static partial class InstEmit32 + { + public static void Vceq_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.Equal, false); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, false); + } + else + { + EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareEQFpscr), false); + } + } + + public static void Vceq_I(ArmEmitterContext context) + { + EmitCmpOpI32(context, context.ICompareEqual, context.ICompareEqual, false, false); + } + + public static void Vceq_Z(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + if (op.F) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.Equal, true); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2OrAvxCmpOpF32(context, CmpCondition.Equal, true); + } + else + { + EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareEQFpscr), true); + } + } + else + { + EmitCmpOpI32(context, context.ICompareEqual, context.ICompareEqual, true, false); + } + } + + public static void Vcge_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThanOrEqual, false); + } + else if (Optimizations.FastFP && Optimizations.UseAvx) + { + EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, false); + } + else + { + EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareGEFpscr), false); + } + } + + public static void Vcge_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + EmitCmpOpI32(context, context.ICompareGreaterOrEqual, context.ICompareGreaterOrEqualUI, false, !op.U); + } + + public static void Vcge_Z(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + if (op.F) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThanOrEqual, true); + } + else if (Optimizations.FastFP && Optimizations.UseAvx) + { + EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThanOrEqual, true); + } + else + { + EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareGEFpscr), true); + } + } + else + { + EmitCmpOpI32(context, context.ICompareGreaterOrEqual, context.ICompareGreaterOrEqualUI, true, true); + } + } + + public static void Vcgt_V(ArmEmitterContext context) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThan, false); + } + else if (Optimizations.FastFP && Optimizations.UseAvx) + { + EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThan, false); + } + else + { + EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareGTFpscr), false); + } + } + + public static void Vcgt_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + EmitCmpOpI32(context, context.ICompareGreater, context.ICompareGreaterUI, false, !op.U); + } + + public static void Vcgt_Z(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + if (op.F) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.GreaterThan, true); + } + else if (Optimizations.FastFP && Optimizations.UseAvx) + { + EmitSse2OrAvxCmpOpF32(context, CmpCondition.GreaterThan, true); + } + else + { + EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareGTFpscr), true); + } + } + else + { + EmitCmpOpI32(context, context.ICompareGreater, context.ICompareGreaterUI, true, true); + } + } + + public static void Vcle_Z(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + if (op.F) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.LessThanOrEqual, true); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2OrAvxCmpOpF32(context, CmpCondition.LessThanOrEqual, true); + } + else + { + EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareLEFpscr), true); + } + } + else + { + EmitCmpOpI32(context, context.ICompareLessOrEqual, context.ICompareLessOrEqualUI, true, true); + } + } + + public static void Vclt_Z(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + if (op.F) + { + if (Optimizations.FastFP && Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitCmpOpF32(context, CmpCondition.LessThan, true); + } + else if (Optimizations.FastFP && Optimizations.UseSse2) + { + EmitSse2OrAvxCmpOpF32(context, CmpCondition.LessThan, true); + } + else + { + EmitCmpOpF32(context, nameof(SoftFloat32.FPCompareLTFpscr), true); + } + } + else + { + EmitCmpOpI32(context, context.ICompareLess, context.ICompareLessUI, true, true); + } + } + + private static void EmitCmpOpF32(ArmEmitterContext context, string name, bool zero) + { + if (zero) + { + EmitVectorUnaryOpF32(context, (m) => + { + Operand zeroOp = m.Type == OperandType.FP64 ? ConstF(0.0d) : ConstF(0.0f); + + return EmitSoftFloatCallDefaultFpscr(context, name, m, zeroOp); + }); + } + else + { + EmitVectorBinaryOpF32(context, (n, m) => + { + return EmitSoftFloatCallDefaultFpscr(context, name, n, m); + }); + } + } + + private static Operand ZerosOrOnes(ArmEmitterContext context, Operand fromBool, OperandType baseType) + { + var ones = (baseType == OperandType.I64) ? Const(-1L) : Const(-1); + + return context.ConditionalSelect(fromBool, ones, Const(baseType, 0L)); + } + + private static void EmitCmpOpI32( + ArmEmitterContext context, + Func2I signedOp, + Func2I unsignedOp, + bool zero, + bool signed) + { + if (zero) + { + if (signed) + { + EmitVectorUnaryOpSx32(context, (m) => + { + OperandType type = m.Type; + Operand zeroV = (type == OperandType.I64) ? Const(0L) : Const(0); + + return ZerosOrOnes(context, signedOp(m, zeroV), type); + }); + } + else + { + EmitVectorUnaryOpZx32(context, (m) => + { + OperandType type = m.Type; + Operand zeroV = (type == OperandType.I64) ? Const(0L) : Const(0); + + return ZerosOrOnes(context, unsignedOp(m, zeroV), type); + }); + } + } + else + { + if (signed) + { + EmitVectorBinaryOpSx32(context, (n, m) => ZerosOrOnes(context, signedOp(n, m), n.Type)); + } + else + { + EmitVectorBinaryOpZx32(context, (n, m) => ZerosOrOnes(context, unsignedOp(n, m), n.Type)); + } + } + } + + public static void Vcmp(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVcmpOrVcmpe(context, false); + } + else + { + EmitVcmpOrVcmpe(context, false); + } + } + + public static void Vcmpe(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVcmpOrVcmpe(context, true); + } + else + { + EmitVcmpOrVcmpe(context, true); + } + } + + private static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + bool cmpWithZero = (op.Opc & 2) != 0; + int sizeF = op.Size & 1; + + if (Optimizations.FastFP && (signalNaNs ? Optimizations.UseAvx : Optimizations.UseSse2)) + { + CmpCondition cmpOrdered = signalNaNs ? CmpCondition.OrderedS : CmpCondition.OrderedQ; + + bool doubleSize = sizeF != 0; + int shift = doubleSize ? 1 : 2; + Operand m = GetVecA32(op.Vm >> shift); + Operand n = GetVecA32(op.Vd >> shift); + + n = EmitSwapScalar(context, n, op.Vd, doubleSize); + m = cmpWithZero ? context.VectorZero() : EmitSwapScalar(context, m, op.Vm, doubleSize); + + Operand lblNaN = Label(); + Operand lblEnd = Label(); + + if (!doubleSize) + { + Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpss, n, m, Const((int)cmpOrdered)); + + Operand isOrdered = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, ordMask); + + context.BranchIfFalse(lblNaN, isOrdered); + + Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comissge, n, m); + Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisseq, n, m); + Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisslt, n, m); + + SetFpFlag(context, FPState.VFlag, Const(0)); + SetFpFlag(context, FPState.CFlag, cf); + SetFpFlag(context, FPState.ZFlag, zf); + SetFpFlag(context, FPState.NFlag, nf); + } + else + { + Operand ordMask = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, m, Const((int)cmpOrdered)); + + Operand isOrdered = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, ordMask); + + context.BranchIfFalse(lblNaN, isOrdered); + + Operand cf = context.AddIntrinsicInt(Intrinsic.X86Comisdge, n, m); + Operand zf = context.AddIntrinsicInt(Intrinsic.X86Comisdeq, n, m); + Operand nf = context.AddIntrinsicInt(Intrinsic.X86Comisdlt, n, m); + + SetFpFlag(context, FPState.VFlag, Const(0)); + SetFpFlag(context, FPState.CFlag, cf); + SetFpFlag(context, FPState.ZFlag, zf); + SetFpFlag(context, FPState.NFlag, nf); + } + + context.Branch(lblEnd); + + context.MarkLabel(lblNaN); + + SetFpFlag(context, FPState.VFlag, Const(1)); + SetFpFlag(context, FPState.CFlag, Const(1)); + SetFpFlag(context, FPState.ZFlag, Const(0)); + SetFpFlag(context, FPState.NFlag, Const(0)); + + context.MarkLabel(lblEnd); + } + else + { + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand ne = ExtractScalar(context, type, op.Vd); + Operand me; + + if (cmpWithZero) + { + me = sizeF == 0 ? ConstF(0f) : ConstF(0d); + } + else + { + me = ExtractScalar(context, type, op.Vm); + } + + Operand nzcv = EmitSoftFloatCall(context, nameof(SoftFloat32.FPCompare), ne, me, Const(signalNaNs)); + + EmitSetFpscrNzcv(context, nzcv); + } + } + + private static void EmitSetFpscrNzcv(ArmEmitterContext context, Operand nzcv) + { + Operand Extract(Operand value, int bit) + { + if (bit != 0) + { + value = context.ShiftRightUI(value, Const(bit)); + } + + value = context.BitwiseAnd(value, Const(1)); + + return value; + } + + SetFpFlag(context, FPState.VFlag, Extract(nzcv, 0)); + SetFpFlag(context, FPState.CFlag, Extract(nzcv, 1)); + SetFpFlag(context, FPState.ZFlag, Extract(nzcv, 2)); + SetFpFlag(context, FPState.NFlag, Extract(nzcv, 3)); + } + + private static void EmitSse2OrAvxCmpOpF32(ArmEmitterContext context, CmpCondition cond, bool zero) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + int sizeF = op.Size & 1; + Intrinsic inst = (sizeF == 0) ? Intrinsic.X86Cmpps : Intrinsic.X86Cmppd; + + if (zero) + { + EmitVectorUnaryOpSimd32(context, (m) => + { + return context.AddIntrinsic(inst, m, context.VectorZero(), Const((int)cond)); + }); + } + else + { + EmitVectorBinaryOpSimd32(context, (n, m) => + { + return context.AddIntrinsic(inst, n, m, Const((int)cond)); + }); + } + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCrypto.cs b/src/ARMeilleure/Instructions/InstEmitSimdCrypto.cs new file mode 100644 index 00000000..db24e029 --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSimdCrypto.cs @@ -0,0 +1,99 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Aesd_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Operand res; + + if (Optimizations.UseAesni) + { + res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero()); + } + else + { + res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Decrypt)), d, n); + } + + context.Copy(d, res); + } + + public static void Aese_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Operand res; + + if (Optimizations.UseAesni) + { + res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero()); + } + else + { + res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Encrypt)), d, n); + } + + context.Copy(d, res); + } + + public static void Aesimc_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Operand res; + + if (Optimizations.UseAesni) + { + res = context.AddIntrinsic(Intrinsic.X86Aesimc, n); + } + else + { + res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.InverseMixColumns)), n); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void Aesmc_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Operand res; + + if (Optimizations.UseAesni) + { + Operand roundKey = context.VectorZero(); + + // Inverse Shift Rows, Inverse Sub Bytes, xor 0 so nothing happens + res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, n, roundKey); + + // Shift Rows, Sub Bytes, Mix Columns (!), xor 0 so nothing happens + res = context.AddIntrinsic(Intrinsic.X86Aesenc, res, roundKey); + } + else + { + res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.MixColumns)), n); + } + + context.Copy(GetVec(op.Rd), res); + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCrypto32.cs b/src/ARMeilleure/Instructions/InstEmitSimdCrypto32.cs new file mode 100644 index 00000000..f713a388 --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSimdCrypto32.cs @@ -0,0 +1,99 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; + +namespace ARMeilleure.Instructions +{ + partial class InstEmit32 + { + public static void Aesd_V(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + Operand d = GetVecA32(op.Qd); + Operand n = GetVecA32(op.Qm); + + Operand res; + + if (Optimizations.UseAesni) + { + res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero()); + } + else + { + res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Decrypt)), d, n); + } + + context.Copy(d, res); + } + + public static void Aese_V(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + Operand d = GetVecA32(op.Qd); + Operand n = GetVecA32(op.Qm); + + Operand res; + + if (Optimizations.UseAesni) + { + res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero()); + } + else + { + res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Encrypt)), d, n); + } + + context.Copy(d, res); + } + + public static void Aesimc_V(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + Operand n = GetVecA32(op.Qm); + + Operand res; + + if (Optimizations.UseAesni) + { + res = context.AddIntrinsic(Intrinsic.X86Aesimc, n); + } + else + { + res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.InverseMixColumns)), n); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void Aesmc_V(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + Operand n = GetVecA32(op.Qm); + + Operand res; + + if (Optimizations.UseAesni) + { + Operand roundKey = context.VectorZero(); + + // Inverse Shift Rows, Inverse Sub Bytes, xor 0 so nothing happens. + res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, n, roundKey); + + // Shift Rows, Sub Bytes, Mix Columns (!), xor 0 so nothing happens. + res = context.AddIntrinsic(Intrinsic.X86Aesenc, res, roundKey); + } + else + { + res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.MixColumns)), n); + } + + context.Copy(GetVecA32(op.Qd), res); + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCvt.cs b/src/ARMeilleure/Instructions/InstEmitSimdCvt.cs new file mode 100644 index 00000000..652ad397 --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSimdCvt.cs @@ -0,0 +1,1891 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; +using System.Reflection; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + using Func1I = Func<Operand, Operand>; + + static partial class InstEmit + { + public static void Fcvt_S(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + if (op.Size == 0 && op.Opc == 1) // Single -> Double. + { + if (Optimizations.UseSse2) + { + Operand n = GetVec(op.Rn); + + Operand res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), n); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0); + + Operand res = context.ConvertToFP(OperandType.FP64, ne); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + } + else if (op.Size == 1 && op.Opc == 0) // Double -> Single. + { + if (Optimizations.UseSse2) + { + Operand n = GetVec(op.Rn); + + Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), n); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand ne = context.VectorExtract(OperandType.FP64, GetVec(op.Rn), 0); + + Operand res = context.ConvertToFP(OperandType.FP32, ne); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + } + else if (op.Size == 0 && op.Opc == 3) // Single -> Half. + { + if (Optimizations.UseF16c) + { + Debug.Assert(!Optimizations.ForceLegacySse); + + Operand n = GetVec(op.Rn); + + Operand res = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, n, Const(X86GetRoundControl(FPRoundingMode.ToNearest))); + res = context.AddIntrinsic(Intrinsic.X86Pslldq, res, Const(14)); // VectorZeroUpper112() + res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(14)); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0); + + context.StoreToContext(); + Operand res = context.Call(typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert)), ne); + context.LoadFromContext(); + + res = context.ZeroExtend16(OperandType.I64, res); + + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, 1)); + } + } + else if (op.Size == 3 && op.Opc == 0) // Half -> Single. + { + if (Optimizations.UseF16c) + { + Debug.Assert(!Optimizations.ForceLegacySse); + + Operand res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, GetVec(op.Rn)); + res = context.VectorZeroUpper96(res); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 1); + + context.StoreToContext(); + Operand res = context.Call(typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert)), ne); + context.LoadFromContext(); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + } + else if (op.Size == 1 && op.Opc == 3) // Double -> Half. + { + if (Optimizations.UseF16c) + { + Debug.Assert(!Optimizations.ForceLegacySse); + + Operand n = GetVec(op.Rn); + + Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), n); + res = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, res, Const(X86GetRoundControl(FPRoundingMode.ToNearest))); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand ne = context.VectorExtract(OperandType.FP64, GetVec(op.Rn), 0); + + context.StoreToContext(); + Operand res = context.Call(typeof(SoftFloat64_16).GetMethod(nameof(SoftFloat64_16.FPConvert)), ne); + context.LoadFromContext(); + + res = context.ZeroExtend16(OperandType.I64, res); + + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, 1)); + } + } + else if (op.Size == 3 && op.Opc == 1) // Half -> Double. + { + if (Optimizations.UseF16c) + { + Operand n = GetVec(op.Rn); + + Operand res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, GetVec(op.Rn)); + res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), res); + res = context.VectorZeroUpper64(res); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 1); + + context.StoreToContext(); + Operand res = context.Call(typeof(SoftFloat16_64).GetMethod(nameof(SoftFloat16_64.FPConvert)), ne); + context.LoadFromContext(); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + } + else // Invalid encoding. + { + Debug.Assert(false, $"type == {op.Size} && opc == {op.Opc}"); + } + } + + public static void Fcvtas_Gp(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtasGp); + } + else if (Optimizations.UseSse41) + { + EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false); + } + else + { + EmitFcvt_s_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1)); + } + } + + public static void Fcvtas_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtasS); + } + else if (Optimizations.UseSse41) + { + EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: true); + } + else + { + EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: true, scalar: true); + } + } + + public static void Fcvtas_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtasS); + } + else if (Optimizations.UseSse41) + { + EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearestAway, scalar: false); + } + else + { + EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: true, scalar: false); + } + } + + public static void Fcvtau_Gp(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtauGp); + } + else if (Optimizations.UseSse41) + { + EmitSse41Fcvtu_Gp(context, FPRoundingMode.ToNearestAway, isFixed: false); + } + else + { + EmitFcvt_u_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1)); + } + } + + public static void Fcvtau_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtauS); + } + else if (Optimizations.UseSse41) + { + EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: true); + } + else + { + EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: false, scalar: true); + } + } + + public static void Fcvtau_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtauV); + } + else if (Optimizations.UseSse41) + { + EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearestAway, scalar: false); + } + else + { + EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: false, scalar: false); + } + } + + public static void Fcvtl_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtlV); + } + else if (Optimizations.UseSse2 && sizeF == 1) + { + Operand n = GetVec(op.Rn); + + Operand res = op.RegisterSize == RegisterSize.Simd128 ? context.AddIntrinsic(Intrinsic.X86Movhlps, n, n) : n; + res = context.AddIntrinsic(Intrinsic.X86Cvtps2pd, res); + + context.Copy(GetVec(op.Rd), res); + } + else if (Optimizations.UseF16c && sizeF == 0) + { + Debug.Assert(!Optimizations.ForceLegacySse); + + Operand n = GetVec(op.Rn); + + Operand res = op.RegisterSize == RegisterSize.Simd128 ? context.AddIntrinsic(Intrinsic.X86Movhlps, n, n) : n; + res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, res); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand res = context.VectorZero(); + + int elems = 4 >> sizeF; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + if (sizeF == 0) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, part + index, 1); + + context.StoreToContext(); + Operand e = context.Call(typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert)), ne); + context.LoadFromContext(); + + res = context.VectorInsert(res, e, index); + } + else /* if (sizeF == 1) */ + { + Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), part + index); + + Operand e = context.ConvertToFP(OperandType.FP64, ne); + + res = context.VectorInsert(res, e, index); + } + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Fcvtms_Gp(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtmsGp); + } + else if (Optimizations.UseSse41) + { + EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false); + } + else + { + EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Floor), op1)); + } + } + + public static void Fcvtms_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtmsV); + } + else if (Optimizations.UseSse41) + { + EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsMinusInfinity, scalar: false); + } + else + { + EmitFcvt(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Floor), op1), signed: true, scalar: false); + } + } + + public static void Fcvtmu_Gp(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtmuGp); + } + else if (Optimizations.UseSse41) + { + EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false); + } + else + { + EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Floor), op1)); + } + } + + public static void Fcvtn_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int sizeF = op.Size & 1; + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOpFRd(context, Intrinsic.Arm64FcvtnV); + } + else if (Optimizations.UseSse2 && sizeF == 1) + { + Operand d = GetVec(op.Rd); + + Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 ? Intrinsic.X86Movlhps : Intrinsic.X86Movhlps; + + Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtpd2ps, GetVec(op.Rn)); + nInt = context.AddIntrinsic(Intrinsic.X86Movlhps, nInt, nInt); + + Operand res = context.VectorZeroUpper64(d); + res = context.AddIntrinsic(movInst, res, nInt); + + context.Copy(d, res); + } + else if (Optimizations.UseF16c && sizeF == 0) + { + Debug.Assert(!Optimizations.ForceLegacySse); + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 ? Intrinsic.X86Movlhps : Intrinsic.X86Movhlps; + + Operand nInt = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, n, Const(X86GetRoundControl(FPRoundingMode.ToNearest))); + nInt = context.AddIntrinsic(Intrinsic.X86Movlhps, nInt, nInt); + + Operand res = context.VectorZeroUpper64(d); + res = context.AddIntrinsic(movInst, res, nInt); + + context.Copy(d, res); + } + else + { + OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64; + + int elems = 4 >> sizeF; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + Operand d = GetVec(op.Rd); + + Operand res = part == 0 ? context.VectorZero() : context.Copy(d); + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); + + if (sizeF == 0) + { + context.StoreToContext(); + Operand e = context.Call(typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert)), ne); + context.LoadFromContext(); + + res = EmitVectorInsert(context, res, e, part + index, 1); + } + else /* if (sizeF == 1) */ + { + Operand e = context.ConvertToFP(OperandType.FP32, ne); + + res = context.VectorInsert(res, e, part + index); + } + } + + context.Copy(d, res); + } + } + + public static void Fcvtns_Gp(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtnsGp); + } + else if (Optimizations.UseSse41) + { + EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearest, isFixed: false); + } + else + { + EmitFcvt_s_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1)); + } + } + + public static void Fcvtns_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtnsS); + } + else if (Optimizations.UseSse41) + { + EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: true); + } + else + { + EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: true, scalar: true); + } + } + + public static void Fcvtns_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtnsV); + } + else if (Optimizations.UseSse41) + { + EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: false); + } + else + { + EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: true, scalar: false); + } + } + + public static void Fcvtnu_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtnuS); + } + else if (Optimizations.UseSse41) + { + EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: true); + } + else + { + EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: false, scalar: true); + } + } + + public static void Fcvtnu_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtnuV); + } + else if (Optimizations.UseSse41) + { + EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: false); + } + else + { + EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: false, scalar: false); + } + } + + public static void Fcvtps_Gp(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtpsGp); + } + else if (Optimizations.UseSse41) + { + EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false); + } + else + { + EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Ceiling), op1)); + } + } + + public static void Fcvtpu_Gp(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtpuGp); + } + else if (Optimizations.UseSse41) + { + EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false); + } + else + { + EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Ceiling), op1)); + } + } + + public static void Fcvtzs_Gp(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtzsGp); + } + else if (Optimizations.UseSse41) + { + EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: false); + } + else + { + EmitFcvt_s_Gp(context, (op1) => op1); + } + } + + public static void Fcvtzs_Gp_Fixed(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFToGp(context, Intrinsic.Arm64FcvtzsGpFixed, op.FBits); + } + else if (Optimizations.UseSse41) + { + EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: true); + } + else + { + EmitFcvtzs_Gp_Fixed(context); + } + } + + public static void Fcvtzs_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtzsS); + } + else if (Optimizations.UseSse41) + { + EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: true); + } + else + { + EmitFcvtz(context, signed: true, scalar: true); + } + } + + public static void Fcvtzs_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtzsV); + } + else if (Optimizations.UseSse41) + { + EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false); + } + else + { + EmitFcvtz(context, signed: true, scalar: false); + } + } + + public static void Fcvtzs_V_Fixed(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64FcvtzsVFixed, GetFBits(context)); + } + else if (Optimizations.UseSse41) + { + EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false); + } + else + { + EmitFcvtz(context, signed: true, scalar: false); + } + } + + public static void Fcvtzu_Gp(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFToGp(context, Intrinsic.Arm64FcvtzuGp); + } + else if (Optimizations.UseSse41) + { + EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: false); + } + else + { + EmitFcvt_u_Gp(context, (op1) => op1); + } + } + + public static void Fcvtzu_Gp_Fixed(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFToGp(context, Intrinsic.Arm64FcvtzuGpFixed, op.FBits); + } + else if (Optimizations.UseSse41) + { + EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: true); + } + else + { + EmitFcvtzu_Gp_Fixed(context); + } + } + + public static void Fcvtzu_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64FcvtzuS); + } + else if (Optimizations.UseSse41) + { + EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: true); + } + else + { + EmitFcvtz(context, signed: false, scalar: true); + } + } + + public static void Fcvtzu_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64FcvtzuV); + } + else if (Optimizations.UseSse41) + { + EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false); + } + else + { + EmitFcvtz(context, signed: false, scalar: false); + } + } + + public static void Fcvtzu_V_Fixed(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64FcvtzuVFixed, GetFBits(context)); + } + else if (Optimizations.UseSse41) + { + EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false); + } + else + { + EmitFcvtz(context, signed: false, scalar: false); + } + } + + public static void Scvtf_Gp(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFFromGp(context, Intrinsic.Arm64ScvtfGp); + } + else + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + Operand res = GetIntOrZR(context, op.Rn); + + if (op.RegisterSize == RegisterSize.Int32) + { + res = context.SignExtend32(OperandType.I64, res); + } + + res = EmitFPConvert(context, res, op.Size, signed: true); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + } + + public static void Scvtf_Gp_Fixed(ArmEmitterContext context) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFFromGp(context, Intrinsic.Arm64ScvtfGpFixed, op.FBits); + } + else + { + Operand res = GetIntOrZR(context, op.Rn); + + if (op.RegisterSize == RegisterSize.Int32) + { + res = context.SignExtend32(OperandType.I64, res); + } + + res = EmitFPConvert(context, res, op.Size, signed: true); + + res = EmitI2fFBitsMul(context, res, op.FBits); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + } + + public static void Scvtf_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64ScvtfS); + } + else if (Optimizations.UseSse2) + { + EmitSse2ScvtfOp(context, scalar: true); + } + else + { + EmitCvtf(context, signed: true, scalar: true); + } + } + + public static void Scvtf_S_Fixed(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpF(context, Intrinsic.Arm64ScvtfSFixed, GetFBits(context)); + } + else if (Optimizations.UseSse2) + { + EmitSse2ScvtfOp(context, scalar: true); + } + else + { + EmitCvtf(context, signed: true, scalar: true); + } + } + + public static void Scvtf_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64ScvtfV); + } + else if (Optimizations.UseSse2) + { + EmitSse2ScvtfOp(context, scalar: false); + } + else + { + EmitCvtf(context, signed: true, scalar: false); + } + } + + public static void Scvtf_V_Fixed(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64ScvtfVFixed, GetFBits(context)); + } + else if (Optimizations.UseSse2) + { + EmitSse2ScvtfOp(context, scalar: false); + } + else + { + EmitCvtf(context, signed: true, scalar: false); + } + } + + public static void Ucvtf_Gp(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpFFromGp(context, Intrinsic.Arm64UcvtfGp); + } + else + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + Operand res = GetIntOrZR(context, op.Rn); + + res = EmitFPConvert(context, res, op.Size, signed: false); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + } + + public static void Ucvtf_Gp_Fixed(ArmEmitterContext context) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpFFromGp(context, Intrinsic.Arm64UcvtfGpFixed, op.FBits); + } + else + { + Operand res = GetIntOrZR(context, op.Rn); + + res = EmitFPConvert(context, res, op.Size, signed: false); + + res = EmitI2fFBitsMul(context, res, op.FBits); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0)); + } + } + + public static void Ucvtf_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarUnaryOpF(context, Intrinsic.Arm64UcvtfS); + } + else if (Optimizations.UseSse2) + { + EmitSse2UcvtfOp(context, scalar: true); + } + else + { + EmitCvtf(context, signed: false, scalar: true); + } + } + + public static void Ucvtf_S_Fixed(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarConvertBinaryOpF(context, Intrinsic.Arm64UcvtfSFixed, GetFBits(context)); + } + else if (Optimizations.UseSse2) + { + EmitSse2UcvtfOp(context, scalar: true); + } + else + { + EmitCvtf(context, signed: false, scalar: true); + } + } + + public static void Ucvtf_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOpF(context, Intrinsic.Arm64UcvtfV); + } + else if (Optimizations.UseSse2) + { + EmitSse2UcvtfOp(context, scalar: false); + } + else + { + EmitCvtf(context, signed: false, scalar: false); + } + } + + public static void Ucvtf_V_Fixed(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorConvertBinaryOpF(context, Intrinsic.Arm64UcvtfVFixed, GetFBits(context)); + } + else if (Optimizations.UseSse2) + { + EmitSse2UcvtfOp(context, scalar: false); + } + else + { + EmitCvtf(context, signed: false, scalar: false); + } + } + + private static void EmitFcvt(ArmEmitterContext context, Func1I emit, bool signed, bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand n = GetVec(op.Rn); + + int sizeF = op.Size & 1; + int sizeI = sizeF + 2; + + OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64; + + int elems = !scalar ? op.GetBytesCount() >> sizeI : 1; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, n, index); + + Operand e = emit(ne); + + if (sizeF == 0) + { + MethodInfo info = signed + ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32)) + : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32)); + + e = context.Call(info, e); + + e = context.ZeroExtend32(OperandType.I64, e); + } + else /* if (sizeF == 1) */ + { + MethodInfo info = signed + ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64)) + : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64)); + + e = context.Call(info, e); + } + + res = EmitVectorInsert(context, res, e, index, sizeI); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitFcvtz(ArmEmitterContext context, bool signed, bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand n = GetVec(op.Rn); + + int sizeF = op.Size & 1; + int sizeI = sizeF + 2; + + OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64; + + int fBits = GetFBits(context); + + int elems = !scalar ? op.GetBytesCount() >> sizeI : 1; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, n, index); + + Operand e = EmitF2iFBitsMul(context, ne, fBits); + + if (sizeF == 0) + { + MethodInfo info = signed + ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32)) + : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32)); + + e = context.Call(info, e); + + e = context.ZeroExtend32(OperandType.I64, e); + } + else /* if (sizeF == 1) */ + { + MethodInfo info = signed + ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64)) + : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64)); + + e = context.Call(info, e); + } + + res = EmitVectorInsert(context, res, e, index, sizeI); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitFcvt_s_Gp(ArmEmitterContext context, Func1I emit) + { + EmitFcvt___Gp(context, emit, signed: true); + } + + private static void EmitFcvt_u_Gp(ArmEmitterContext context, Func1I emit) + { + EmitFcvt___Gp(context, emit, signed: false); + } + + private static void EmitFcvt___Gp(ArmEmitterContext context, Func1I emit, bool signed) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64; + + Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); + + Operand res = signed + ? EmitScalarFcvts(context, emit(ne), 0) + : EmitScalarFcvtu(context, emit(ne), 0); + + SetIntOrZR(context, op.Rd, res); + } + + private static void EmitFcvtzs_Gp_Fixed(ArmEmitterContext context) + { + EmitFcvtz__Gp_Fixed(context, signed: true); + } + + private static void EmitFcvtzu_Gp_Fixed(ArmEmitterContext context) + { + EmitFcvtz__Gp_Fixed(context, signed: false); + } + + private static void EmitFcvtz__Gp_Fixed(ArmEmitterContext context, bool signed) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64; + + Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); + + Operand res = signed + ? EmitScalarFcvts(context, ne, op.FBits) + : EmitScalarFcvtu(context, ne, op.FBits); + + SetIntOrZR(context, op.Rd, res); + } + + private static void EmitCvtf(ArmEmitterContext context, bool signed, bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + int sizeI = sizeF + 2; + + int fBits = GetFBits(context); + + int elems = !scalar ? op.GetBytesCount() >> sizeI : 1; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorLongExtract(context, op.Rn, index, sizeI); + + Operand e = EmitFPConvert(context, ne, sizeF, signed); + + e = EmitI2fFBitsMul(context, e, fBits); + + res = context.VectorInsert(res, e, index); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static int GetFBits(ArmEmitterContext context) + { + if (context.CurrOp is OpCodeSimdShImm op) + { + return GetImmShr(op); + } + + return 0; + } + + private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, int size, bool signed) + { + Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64); + Debug.Assert((uint)size < 2); + + OperandType type = size == 0 ? OperandType.FP32 : OperandType.FP64; + + if (signed) + { + return context.ConvertToFP(type, value); + } + else + { + return context.ConvertToFPUI(type, value); + } + } + + private static Operand EmitScalarFcvts(ArmEmitterContext context, Operand value, int fBits) + { + Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64); + + value = EmitF2iFBitsMul(context, value, fBits); + + MethodInfo info; + + if (context.CurrOp.RegisterSize == RegisterSize.Int32) + { + info = value.Type == OperandType.FP32 + ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32)) + : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS32)); + } + else + { + info = value.Type == OperandType.FP32 + ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS64)) + : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64)); + } + + return context.Call(info, value); + } + + private static Operand EmitScalarFcvtu(ArmEmitterContext context, Operand value, int fBits) + { + Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64); + + value = EmitF2iFBitsMul(context, value, fBits); + + MethodInfo info; + + if (context.CurrOp.RegisterSize == RegisterSize.Int32) + { + info = value.Type == OperandType.FP32 + ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32)) + : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU32)); + } + else + { + info = value.Type == OperandType.FP32 + ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU64)) + : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64)); + } + + return context.Call(info, value); + } + + private static Operand EmitF2iFBitsMul(ArmEmitterContext context, Operand value, int fBits) + { + Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64); + + if (fBits == 0) + { + return value; + } + + if (value.Type == OperandType.FP32) + { + return context.Multiply(value, ConstF(MathF.Pow(2f, fBits))); + } + else /* if (value.Type == OperandType.FP64) */ + { + return context.Multiply(value, ConstF(Math.Pow(2d, fBits))); + } + } + + private static Operand EmitI2fFBitsMul(ArmEmitterContext context, Operand value, int fBits) + { + Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64); + + if (fBits == 0) + { + return value; + } + + if (value.Type == OperandType.FP32) + { + return context.Multiply(value, ConstF(1f / MathF.Pow(2f, fBits))); + } + else /* if (value.Type == OperandType.FP64) */ + { + return context.Multiply(value, ConstF(1d / Math.Pow(2d, fBits))); + } + } + + public static Operand EmitSse2CvtDoubleToInt64OpF(ArmEmitterContext context, Operand opF, bool scalar) + { + Debug.Assert(opF.Type == OperandType.V128); + + Operand longL = context.AddIntrinsicLong (Intrinsic.X86Cvtsd2si, opF); // opFL + Operand res = context.VectorCreateScalar(longL); + + if (!scalar) + { + Operand opFH = context.AddIntrinsic (Intrinsic.X86Movhlps, res, opF); // res doesn't matter. + Operand longH = context.AddIntrinsicLong (Intrinsic.X86Cvtsd2si, opFH); + Operand resH = context.VectorCreateScalar(longH); + res = context.AddIntrinsic (Intrinsic.X86Movlhps, res, resH); + } + + return res; + } + + private static Operand EmitSse2CvtInt64ToDoubleOp(ArmEmitterContext context, Operand op, bool scalar) + { + Debug.Assert(op.Type == OperandType.V128); + + Operand longL = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, op); // opL + Operand res = context.AddIntrinsic (Intrinsic.X86Cvtsi2sd, context.VectorZero(), longL); + + if (!scalar) + { + Operand opH = context.AddIntrinsic (Intrinsic.X86Movhlps, res, op); // res doesn't matter. + Operand longH = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, opH); + Operand resH = context.AddIntrinsic (Intrinsic.X86Cvtsi2sd, res, longH); // res doesn't matter. + res = context.AddIntrinsic (Intrinsic.X86Movlhps, res, resH); + } + + return res; + } + + private static void EmitSse2ScvtfOp(ArmEmitterContext context, bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + // sizeF == ((OpCodeSimdShImm)op).Size - 2 + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n); + + if (op is OpCodeSimdShImm fixedOp) + { + int fBits = GetImmShr(fixedOp); + + // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits) + int fpScaled = 0x3F800000 - fBits * 0x800000; + + Operand fpScaledMask = scalar + ? X86GetScalar (context, fpScaled) + : X86GetAllElements(context, fpScaled); + + res = context.AddIntrinsic(Intrinsic.X86Mulps, res, fpScaledMask); + } + + if (scalar) + { + res = context.VectorZeroUpper96(res); + } + else if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand res = EmitSse2CvtInt64ToDoubleOp(context, n, scalar); + + if (op is OpCodeSimdShImm fixedOp) + { + int fBits = GetImmShr(fixedOp); + + // BitConverter.Int64BitsToDouble(fpScaled) == 1d / Math.Pow(2d, fBits) + long fpScaled = 0x3FF0000000000000L - fBits * 0x10000000000000L; + + Operand fpScaledMask = scalar + ? X86GetScalar (context, fpScaled) + : X86GetAllElements(context, fpScaled); + + res = context.AddIntrinsic(Intrinsic.X86Mulpd, res, fpScaledMask); + } + + if (scalar) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + private static void EmitSse2UcvtfOp(ArmEmitterContext context, bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + // sizeF == ((OpCodeSimdShImm)op).Size - 2 + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand mask = scalar // 65536.000f (1 << 16) + ? X86GetScalar (context, 0x47800000) + : X86GetAllElements(context, 0x47800000); + + Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16)); + res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res); + res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask); + + Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16)); + res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16)); + res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2); + + res = context.AddIntrinsic(Intrinsic.X86Addps, res, res2); + + if (op is OpCodeSimdShImm fixedOp) + { + int fBits = GetImmShr(fixedOp); + + // BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits) + int fpScaled = 0x3F800000 - fBits * 0x800000; + + Operand fpScaledMask = scalar + ? X86GetScalar (context, fpScaled) + : X86GetAllElements(context, fpScaled); + + res = context.AddIntrinsic(Intrinsic.X86Mulps, res, fpScaledMask); + } + + if (scalar) + { + res = context.VectorZeroUpper96(res); + } + else if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else /* if (sizeF == 1) */ + { + Operand mask = scalar // 4294967296.0000000d (1L << 32) + ? X86GetScalar (context, 0x41F0000000000000L) + : X86GetAllElements(context, 0x41F0000000000000L); + + Operand res = context.AddIntrinsic (Intrinsic.X86Psrlq, n, Const(32)); + res = EmitSse2CvtInt64ToDoubleOp(context, res, scalar); + res = context.AddIntrinsic (Intrinsic.X86Mulpd, res, mask); + + Operand res2 = context.AddIntrinsic (Intrinsic.X86Psllq, n, Const(32)); + res2 = context.AddIntrinsic (Intrinsic.X86Psrlq, res2, Const(32)); + res2 = EmitSse2CvtInt64ToDoubleOp(context, res2, scalar); + + res = context.AddIntrinsic(Intrinsic.X86Addpd, res, res2); + + if (op is OpCodeSimdShImm fixedOp) + { + int fBits = GetImmShr(fixedOp); + + // BitConverter.Int64BitsToDouble(fpScaled) == 1d / Math.Pow(2d, fBits) + long fpScaled = 0x3FF0000000000000L - fBits * 0x10000000000000L; + + Operand fpScaledMask = scalar + ? X86GetScalar (context, fpScaled) + : X86GetAllElements(context, fpScaled); + + res = context.AddIntrinsic(Intrinsic.X86Mulpd, res, fpScaledMask); + } + + if (scalar) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + private static void EmitSse41FcvtsOpF(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + // sizeF == ((OpCodeSimdShImm)op).Size - 2 + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n); + + if (op is OpCodeSimdShImm fixedOp) + { + int fBits = GetImmShr(fixedOp); + + // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits) + int fpScaled = 0x3F800000 + fBits * 0x800000; + + Operand fpScaledMask = scalar + ? X86GetScalar (context, fpScaled) + : X86GetAllElements(context, fpScaled); + + nRes = context.AddIntrinsic(Intrinsic.X86Mulps, nRes, fpScaledMask); + } + + if (roundMode != FPRoundingMode.ToNearestAway) + { + nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode))); + } + else + { + nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar); + } + + Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes); + + Operand fpMaxValMask = scalar // 2.14748365E9f (2147483648) + ? X86GetScalar (context, 0x4F000000) + : X86GetAllElements(context, 0x4F000000); + + nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan)); + + Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, nRes); + + if (scalar) + { + dRes = context.VectorZeroUpper96(dRes); + } + else if (op.RegisterSize == RegisterSize.Simd64) + { + dRes = context.VectorZeroUpper64(dRes); + } + + context.Copy(GetVec(op.Rd), dRes); + } + else /* if (sizeF == 1) */ + { + Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n); + + if (op is OpCodeSimdShImm fixedOp) + { + int fBits = GetImmShr(fixedOp); + + // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits) + long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L; + + Operand fpScaledMask = scalar + ? X86GetScalar (context, fpScaled) + : X86GetAllElements(context, fpScaled); + + nRes = context.AddIntrinsic(Intrinsic.X86Mulpd, nRes, fpScaledMask); + } + + if (roundMode != FPRoundingMode.ToNearestAway) + { + nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode))); + } + else + { + nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar); + } + + Operand nLong = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar); + + Operand fpMaxValMask = scalar // 9.2233720368547760E18d (9223372036854775808) + ? X86GetScalar (context, 0x43E0000000000000L) + : X86GetAllElements(context, 0x43E0000000000000L); + + nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan)); + + Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong, nRes); + + if (scalar) + { + dRes = context.VectorZeroUpper64(dRes); + } + + context.Copy(GetVec(op.Rd), dRes); + } + } + + private static void EmitSse41FcvtuOpF(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + // sizeF == ((OpCodeSimdShImm)op).Size - 2 + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n); + + if (op is OpCodeSimdShImm fixedOp) + { + int fBits = GetImmShr(fixedOp); + + // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits) + int fpScaled = 0x3F800000 + fBits * 0x800000; + + Operand fpScaledMask = scalar + ? X86GetScalar (context, fpScaled) + : X86GetAllElements(context, fpScaled); + + nRes = context.AddIntrinsic(Intrinsic.X86Mulps, nRes, fpScaledMask); + } + + if (roundMode != FPRoundingMode.ToNearestAway) + { + nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode))); + } + else + { + nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar); + } + + Operand zero = context.VectorZero(); + + Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); + + Operand fpMaxValMask = scalar // 2.14748365E9f (2147483648) + ? X86GetScalar (context, 0x4F000000) + : X86GetAllElements(context, 0x4F000000); + + Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes); + + nRes = context.AddIntrinsic(Intrinsic.X86Subps, nRes, fpMaxValMask); + + nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); + + Operand nInt2 = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes); + + nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan)); + + Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt2, nRes); + dRes = context.AddIntrinsic(Intrinsic.X86Paddd, dRes, nInt); + + if (scalar) + { + dRes = context.VectorZeroUpper96(dRes); + } + else if (op.RegisterSize == RegisterSize.Simd64) + { + dRes = context.VectorZeroUpper64(dRes); + } + + context.Copy(GetVec(op.Rd), dRes); + } + else /* if (sizeF == 1) */ + { + Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n); + + if (op is OpCodeSimdShImm fixedOp) + { + int fBits = GetImmShr(fixedOp); + + // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits) + long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L; + + Operand fpScaledMask = scalar + ? X86GetScalar (context, fpScaled) + : X86GetAllElements(context, fpScaled); + + nRes = context.AddIntrinsic(Intrinsic.X86Mulpd, nRes, fpScaledMask); + } + + if (roundMode != FPRoundingMode.ToNearestAway) + { + nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode))); + } + else + { + nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar); + } + + Operand zero = context.VectorZero(); + + Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); + + Operand fpMaxValMask = scalar // 9.2233720368547760E18d (9223372036854775808) + ? X86GetScalar (context, 0x43E0000000000000L) + : X86GetAllElements(context, 0x43E0000000000000L); + + Operand nLong = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar); + + nRes = context.AddIntrinsic(Intrinsic.X86Subpd, nRes, fpMaxValMask); + + nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); + + Operand nLong2 = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar); + + nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan)); + + Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong2, nRes); + dRes = context.AddIntrinsic(Intrinsic.X86Paddq, dRes, nLong); + + if (scalar) + { + dRes = context.VectorZeroUpper64(dRes); + } + + context.Copy(GetVec(op.Rd), dRes); + } + } + + private static void EmitSse41Fcvts_Gp(ArmEmitterContext context, FPRoundingMode roundMode, bool isFixed) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + Operand n = GetVec(op.Rn); + + if (op.Size == 0) + { + Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n); + + if (isFixed) + { + // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, op.FBits) + int fpScaled = 0x3F800000 + op.FBits * 0x800000; + + Operand fpScaledMask = X86GetScalar(context, fpScaled); + + nRes = context.AddIntrinsic(Intrinsic.X86Mulss, nRes, fpScaledMask); + } + + if (roundMode != FPRoundingMode.ToNearestAway) + { + nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode))); + } + else + { + nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true); + } + + Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32 + ? context.AddIntrinsicInt (Intrinsic.X86Cvtss2si, nRes) + : context.AddIntrinsicLong(Intrinsic.X86Cvtss2si, nRes); + + int fpMaxVal = op.RegisterSize == RegisterSize.Int32 + ? 0x4F000000 // 2.14748365E9f (2147483648) + : 0x5F000000; // 9.223372E18f (9223372036854775808) + + Operand fpMaxValMask = X86GetScalar(context, fpMaxVal); + + nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan)); + + Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes); + + if (op.RegisterSize == RegisterSize.Int64) + { + nInt = context.SignExtend32(OperandType.I64, nInt); + } + + Operand dRes = context.BitwiseExclusiveOr(nIntOrLong, nInt); + + SetIntOrZR(context, op.Rd, dRes); + } + else /* if (op.Size == 1) */ + { + Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n); + + if (isFixed) + { + // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, op.FBits) + long fpScaled = 0x3FF0000000000000L + op.FBits * 0x10000000000000L; + + Operand fpScaledMask = X86GetScalar(context, fpScaled); + + nRes = context.AddIntrinsic(Intrinsic.X86Mulsd, nRes, fpScaledMask); + } + + if (roundMode != FPRoundingMode.ToNearestAway) + { + nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode))); + } + else + { + nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true); + } + + Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32 + ? context.AddIntrinsicInt (Intrinsic.X86Cvtsd2si, nRes) + : context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRes); + + long fpMaxVal = op.RegisterSize == RegisterSize.Int32 + ? 0x41E0000000000000L // 2147483648.0000000d (2147483648) + : 0x43E0000000000000L; // 9.2233720368547760E18d (9223372036854775808) + + Operand fpMaxValMask = X86GetScalar(context, fpMaxVal); + + nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan)); + + Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes); + + if (op.RegisterSize == RegisterSize.Int32) + { + nLong = context.ConvertI64ToI32(nLong); + } + + Operand dRes = context.BitwiseExclusiveOr(nIntOrLong, nLong); + + SetIntOrZR(context, op.Rd, dRes); + } + } + + private static void EmitSse41Fcvtu_Gp(ArmEmitterContext context, FPRoundingMode roundMode, bool isFixed) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + Operand n = GetVec(op.Rn); + + if (op.Size == 0) + { + Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n); + + if (isFixed) + { + // BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, op.FBits) + int fpScaled = 0x3F800000 + op.FBits * 0x800000; + + Operand fpScaledMask = X86GetScalar(context, fpScaled); + + nRes = context.AddIntrinsic(Intrinsic.X86Mulss, nRes, fpScaledMask); + } + + if (roundMode != FPRoundingMode.ToNearestAway) + { + nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode))); + } + else + { + nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true); + } + + Operand zero = context.VectorZero(); + + Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); + + int fpMaxVal = op.RegisterSize == RegisterSize.Int32 + ? 0x4F000000 // 2.14748365E9f (2147483648) + : 0x5F000000; // 9.223372E18f (9223372036854775808) + + Operand fpMaxValMask = X86GetScalar(context, fpMaxVal); + + Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32 + ? context.AddIntrinsicInt (Intrinsic.X86Cvtss2si, nRes) + : context.AddIntrinsicLong(Intrinsic.X86Cvtss2si, nRes); + + nRes = context.AddIntrinsic(Intrinsic.X86Subss, nRes, fpMaxValMask); + + nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); + + Operand nIntOrLong2 = op.RegisterSize == RegisterSize.Int32 + ? context.AddIntrinsicInt (Intrinsic.X86Cvtss2si, nRes) + : context.AddIntrinsicLong(Intrinsic.X86Cvtss2si, nRes); + + nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan)); + + Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes); + + if (op.RegisterSize == RegisterSize.Int64) + { + nInt = context.SignExtend32(OperandType.I64, nInt); + } + + Operand dRes = context.BitwiseExclusiveOr(nIntOrLong2, nInt); + dRes = context.Add(dRes, nIntOrLong); + + SetIntOrZR(context, op.Rd, dRes); + } + else /* if (op.Size == 1) */ + { + Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n); + + if (isFixed) + { + // BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, op.FBits) + long fpScaled = 0x3FF0000000000000L + op.FBits * 0x10000000000000L; + + Operand fpScaledMask = X86GetScalar(context, fpScaled); + + nRes = context.AddIntrinsic(Intrinsic.X86Mulsd, nRes, fpScaledMask); + } + + if (roundMode != FPRoundingMode.ToNearestAway) + { + nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode))); + } + else + { + nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true); + } + + Operand zero = context.VectorZero(); + + Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); + + long fpMaxVal = op.RegisterSize == RegisterSize.Int32 + ? 0x41E0000000000000L // 2147483648.0000000d (2147483648) + : 0x43E0000000000000L; // 9.2233720368547760E18d (9223372036854775808) + + Operand fpMaxValMask = X86GetScalar(context, fpMaxVal); + + Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32 + ? context.AddIntrinsicInt (Intrinsic.X86Cvtsd2si, nRes) + : context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRes); + + nRes = context.AddIntrinsic(Intrinsic.X86Subsd, nRes, fpMaxValMask); + + nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); + + Operand nIntOrLong2 = op.RegisterSize == RegisterSize.Int32 + ? context.AddIntrinsicInt (Intrinsic.X86Cvtsd2si, nRes) + : context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRes); + + nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan)); + + Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes); + + if (op.RegisterSize == RegisterSize.Int32) + { + nLong = context.ConvertI64ToI32(nLong); + } + + Operand dRes = context.BitwiseExclusiveOr(nIntOrLong2, nLong); + dRes = context.Add(dRes, nIntOrLong); + + SetIntOrZR(context, op.Rd, dRes); + } + } + + private static Operand EmitVectorLongExtract(ArmEmitterContext context, int reg, int index, int size) + { + OperandType type = size == 3 ? OperandType.I64 : OperandType.I32; + + return context.VectorExtract(type, GetVec(reg), index); + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitSimdCvt32.cs b/src/ARMeilleure/Instructions/InstEmitSimdCvt32.cs new file mode 100644 index 00000000..33ae83df --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSimdCvt32.cs @@ -0,0 +1,800 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; +using System.Reflection; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper32; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + private static int FlipVdBits(int vd, bool lowBit) + { + if (lowBit) + { + // Move the low bit to the top. + return ((vd & 0x1) << 4) | (vd >> 1); + } + else + { + // Move the high bit to the bottom. + return ((vd & 0xf) << 1) | (vd >> 4); + } + } + + private static Operand EmitSaturateFloatToInt(ArmEmitterContext context, Operand op1, bool unsigned) + { + MethodInfo info; + + if (op1.Type == OperandType.FP64) + { + info = unsigned + ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU32)) + : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS32)); + } + else + { + info = unsigned + ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32)) + : typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32)); + } + + return context.Call(info, op1); + } + + public static void Vcvt_V(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + bool unsigned = (op.Opc & 1) != 0; + bool toInteger = (op.Opc & 2) != 0; + OperandType floatSize = (op.Size == 2) ? OperandType.FP32 : OperandType.FP64; + + if (toInteger) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuV : Intrinsic.Arm64FcvtzsV); + } + else if (Optimizations.UseSse41) + { + EmitSse41ConvertVector32(context, FPRoundingMode.TowardsZero, !unsigned); + } + else + { + EmitVectorUnaryOpF32(context, (op1) => + { + return EmitSaturateFloatToInt(context, op1, unsigned); + }); + } + } + else + { + if (Optimizations.UseSse2) + { + EmitVectorUnaryOpSimd32(context, (n) => + { + if (unsigned) + { + Operand mask = X86GetAllElements(context, 0x47800000); + + Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16)); + res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res); + res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask); + + Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16)); + res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16)); + res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2); + + return context.AddIntrinsic(Intrinsic.X86Addps, res, res2); + } + else + { + return context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n); + } + }); + } + else + { + if (unsigned) + { + EmitVectorUnaryOpZx32(context, (op1) => EmitFPConvert(context, op1, floatSize, false)); + } + else + { + EmitVectorUnaryOpSx32(context, (op1) => EmitFPConvert(context, op1, floatSize, true)); + } + } + } + } + + public static void Vcvt_FD(ArmEmitterContext context) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + int vm = op.Vm; + int vd; + if (op.Size == 3) + { + vd = FlipVdBits(op.Vd, false); + // Double to single. + Operand fp = ExtractScalar(context, OperandType.FP64, vm); + + Operand res = context.ConvertToFP(OperandType.FP32, fp); + + InsertScalar(context, vd, res); + } + else + { + vd = FlipVdBits(op.Vd, true); + // Single to double. + Operand fp = ExtractScalar(context, OperandType.FP32, vm); + + Operand res = context.ConvertToFP(OperandType.FP64, fp); + + InsertScalar(context, vd, res); + } + } + + // VCVT (floating-point to integer, floating-point) | VCVT (integer to floating-point, floating-point). + public static void Vcvt_FI(ArmEmitterContext context) + { + OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; + + bool toInteger = (op.Opc2 & 0b100) != 0; + + OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32; + + if (toInteger) + { + bool unsigned = (op.Opc2 & 1) == 0; + bool roundWithFpscr = op.Opc != 1; + + if (!roundWithFpscr && Optimizations.UseAdvSimd) + { + bool doubleSize = floatSize == OperandType.FP64; + + if (doubleSize) + { + Operand m = GetVecA32(op.Vm >> 1); + + Operand toConvert = InstEmitSimdHelper32Arm64.EmitExtractScalar(context, m, op.Vm, doubleSize); + + Intrinsic inst = (unsigned ? Intrinsic.Arm64FcvtzuGp : Intrinsic.Arm64FcvtzsGp) | Intrinsic.Arm64VDouble; + + Operand asInteger = context.AddIntrinsicInt(inst, toConvert); + + InsertScalar(context, op.Vd, asInteger); + } + else + { + InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, unsigned ? Intrinsic.Arm64FcvtzuS : Intrinsic.Arm64FcvtzsS); + } + } + else if (!roundWithFpscr && Optimizations.UseSse41) + { + EmitSse41ConvertInt32(context, FPRoundingMode.TowardsZero, !unsigned); + } + else + { + Operand toConvert = ExtractScalar(context, floatSize, op.Vm); + + // TODO: Fast Path. + if (roundWithFpscr) + { + toConvert = EmitRoundByRMode(context, toConvert); + } + + // Round towards zero. + Operand asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned); + + InsertScalar(context, op.Vd, asInteger); + } + } + else + { + bool unsigned = op.Opc == 0; + + Operand toConvert = ExtractScalar(context, OperandType.I32, op.Vm); + + Operand asFloat = EmitFPConvert(context, toConvert, floatSize, !unsigned); + + InsertScalar(context, op.Vd, asFloat); + } + } + + private static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n) + { + IOpCode32Simd op = (IOpCode32Simd)context.CurrOp; + + string name = nameof(Math.Round); + + MethodInfo info = (op.Size & 1) == 0 + ? typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(MidpointRounding) }) + : typeof(Math). GetMethod(name, new Type[] { typeof(double), typeof(MidpointRounding) }); + + return context.Call(info, n, Const((int)roundMode)); + } + + private static FPRoundingMode RMToRoundMode(int rm) + { + FPRoundingMode roundMode; + switch (rm) + { + case 0b00: + roundMode = FPRoundingMode.ToNearestAway; + break; + case 0b01: + roundMode = FPRoundingMode.ToNearest; + break; + case 0b10: + roundMode = FPRoundingMode.TowardsPlusInfinity; + break; + case 0b11: + roundMode = FPRoundingMode.TowardsMinusInfinity; + break; + default: + throw new ArgumentOutOfRangeException(nameof(rm)); + } + return roundMode; + } + + // VCVTA/M/N/P (floating-point). + public static void Vcvt_RM(ArmEmitterContext context) + { + OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; // toInteger == true (opCode<18> == 1 => Opc2<2> == 1). + + OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32; + + bool unsigned = op.Opc == 0; + int rm = op.Opc2 & 3; + + Intrinsic inst; + + if (Optimizations.UseAdvSimd) + { + if (unsigned) + { + inst = rm switch { + 0b00 => Intrinsic.Arm64FcvtauS, + 0b01 => Intrinsic.Arm64FcvtnuS, + 0b10 => Intrinsic.Arm64FcvtpuS, + 0b11 => Intrinsic.Arm64FcvtmuS, + _ => throw new ArgumentOutOfRangeException(nameof(rm)) + }; + } + else + { + inst = rm switch { + 0b00 => Intrinsic.Arm64FcvtasS, + 0b01 => Intrinsic.Arm64FcvtnsS, + 0b10 => Intrinsic.Arm64FcvtpsS, + 0b11 => Intrinsic.Arm64FcvtmsS, + _ => throw new ArgumentOutOfRangeException(nameof(rm)) + }; + } + + InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst); + } + else if (Optimizations.UseSse41) + { + EmitSse41ConvertInt32(context, RMToRoundMode(rm), !unsigned); + } + else + { + Operand toConvert = ExtractScalar(context, floatSize, op.Vm); + + switch (rm) + { + case 0b00: // Away + toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert); + break; + case 0b01: // Nearest + toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert); + break; + case 0b10: // Towards positive infinity + toConvert = EmitUnaryMathCall(context, nameof(Math.Ceiling), toConvert); + break; + case 0b11: // Towards negative infinity + toConvert = EmitUnaryMathCall(context, nameof(Math.Floor), toConvert); + break; + } + + Operand asInteger = EmitSaturateFloatToInt(context, toConvert, unsigned); + + InsertScalar(context, op.Vd, asInteger); + } + } + + public static void Vcvt_TB(ArmEmitterContext context) + { + OpCode32SimdCvtTB op = (OpCode32SimdCvtTB)context.CurrOp; + + if (Optimizations.UseF16c) + { + Debug.Assert(!Optimizations.ForceLegacySse); + + if (op.Op) + { + Operand res = ExtractScalar(context, op.Size == 1 ? OperandType.FP64 : OperandType.FP32, op.Vm); + if (op.Size == 1) + { + res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), res); + } + res = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, res, Const(X86GetRoundControl(FPRoundingMode.ToNearest))); + res = context.VectorExtract16(res, 0); + InsertScalar16(context, op.Vd, op.T, res); + } + else + { + Operand res = context.VectorCreateScalar(ExtractScalar16(context, op.Vm, op.T)); + res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, res); + if (op.Size == 1) + { + res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), res); + } + res = context.VectorExtract(op.Size == 1 ? OperandType.I64 : OperandType.I32, res, 0); + InsertScalar(context, op.Vd, res); + } + } + else + { + if (op.Op) + { + // Convert to half. + + Operand src = ExtractScalar(context, op.Size == 1 ? OperandType.FP64 : OperandType.FP32, op.Vm); + + MethodInfo method = op.Size == 1 + ? typeof(SoftFloat64_16).GetMethod(nameof(SoftFloat64_16.FPConvert)) + : typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert)); + + context.ExitArmFpMode(); + context.StoreToContext(); + Operand res = context.Call(method, src); + context.LoadFromContext(); + context.EnterArmFpMode(); + + InsertScalar16(context, op.Vd, op.T, res); + } + else + { + // Convert from half. + + Operand src = ExtractScalar16(context, op.Vm, op.T); + + MethodInfo method = op.Size == 1 + ? typeof(SoftFloat16_64).GetMethod(nameof(SoftFloat16_64.FPConvert)) + : typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert)); + + context.ExitArmFpMode(); + context.StoreToContext(); + Operand res = context.Call(method, src); + context.LoadFromContext(); + context.EnterArmFpMode(); + + InsertScalar(context, op.Vd, res); + } + } + } + + // VRINTA/M/N/P (floating-point). + public static void Vrint_RM(ArmEmitterContext context) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + OperandType floatSize = op.RegisterSize == RegisterSize.Int64 ? OperandType.FP64 : OperandType.FP32; + + int rm = op.Opc2 & 3; + + if (Optimizations.UseAdvSimd) + { + Intrinsic inst = rm switch { + 0b00 => Intrinsic.Arm64FrintaS, + 0b01 => Intrinsic.Arm64FrintnS, + 0b10 => Intrinsic.Arm64FrintpS, + 0b11 => Intrinsic.Arm64FrintmS, + _ => throw new ArgumentOutOfRangeException(nameof(rm)) + }; + + InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, inst); + } + else if (Optimizations.UseSse41) + { + EmitScalarUnaryOpSimd32(context, (m) => + { + FPRoundingMode roundMode = RMToRoundMode(rm); + + if (roundMode != FPRoundingMode.ToNearestAway) + { + Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd; + return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(roundMode))); + } + else + { + return EmitSse41RoundToNearestWithTiesToAwayOpF(context, m, scalar: true); + } + }); + } + else + { + Operand toConvert = ExtractScalar(context, floatSize, op.Vm); + + switch (rm) + { + case 0b00: // Away + toConvert = EmitRoundMathCall(context, MidpointRounding.AwayFromZero, toConvert); + break; + case 0b01: // Nearest + toConvert = EmitRoundMathCall(context, MidpointRounding.ToEven, toConvert); + break; + case 0b10: // Towards positive infinity + toConvert = EmitUnaryMathCall(context, nameof(Math.Ceiling), toConvert); + break; + case 0b11: // Towards negative infinity + toConvert = EmitUnaryMathCall(context, nameof(Math.Floor), toConvert); + break; + } + + InsertScalar(context, op.Vd, toConvert); + } + } + + // VRINTA (vector). + public static void Vrinta_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintaS); + } + else + { + EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, m)); + } + } + + // VRINTM (vector). + public static void Vrintm_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintmS); + } + else if (Optimizations.UseSse2) + { + EmitVectorUnaryOpSimd32(context, (m) => + { + return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.TowardsMinusInfinity))); + }); + } + else + { + EmitVectorUnaryOpF32(context, (m) => EmitUnaryMathCall(context, nameof(Math.Floor), m)); + } + } + + // VRINTN (vector). + public static void Vrintn_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintnS); + } + else if (Optimizations.UseSse2) + { + EmitVectorUnaryOpSimd32(context, (m) => + { + return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.ToNearest))); + }); + } + else + { + EmitVectorUnaryOpF32(context, (m) => EmitRoundMathCall(context, MidpointRounding.ToEven, m)); + } + } + + // VRINTP (vector). + public static void Vrintp_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorUnaryOpF32(context, Intrinsic.Arm64FrintpS); + } + else if (Optimizations.UseSse2) + { + EmitVectorUnaryOpSimd32(context, (m) => + { + return context.AddIntrinsic(Intrinsic.X86Roundps, m, Const(X86GetRoundControl(FPRoundingMode.TowardsPlusInfinity))); + }); + } + else + { + EmitVectorUnaryOpF32(context, (m) => EmitUnaryMathCall(context, nameof(Math.Ceiling), m)); + } + } + + // VRINTZ (floating-point). + public static void Vrint_Z(ArmEmitterContext context) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintzS); + } + else if (Optimizations.UseSse2) + { + EmitScalarUnaryOpSimd32(context, (m) => + { + Intrinsic inst = (op.Size & 1) == 0 ? Intrinsic.X86Roundss : Intrinsic.X86Roundsd; + return context.AddIntrinsic(inst, m, Const(X86GetRoundControl(FPRoundingMode.TowardsZero))); + }); + } + else + { + EmitScalarUnaryOpF32(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Truncate), op1)); + } + } + + // VRINTX (floating-point). + public static void Vrintx_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitScalarUnaryOpF32(context, Intrinsic.Arm64FrintxS); + } + else + { + EmitScalarUnaryOpF32(context, (op1) => + { + return EmitRoundByRMode(context, op1); + }); + } + } + + private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, OperandType type, bool signed) + { + Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64); + + if (signed) + { + return context.ConvertToFP(type, value); + } + else + { + return context.ConvertToFPUI(type, value); + } + } + + private static void EmitSse41ConvertInt32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed) + { + // A port of the similar round function in InstEmitSimdCvt. + OpCode32SimdCvtFI op = (OpCode32SimdCvtFI)context.CurrOp; + + bool doubleSize = (op.Size & 1) != 0; + int shift = doubleSize ? 1 : 2; + Operand n = GetVecA32(op.Vm >> shift); + n = EmitSwapScalar(context, n, op.Vm, doubleSize); + + if (!doubleSize) + { + Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n); + + if (roundMode != FPRoundingMode.ToNearestAway) + { + nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode))); + } + else + { + nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true); + } + + Operand zero = context.VectorZero(); + + Operand nCmp; + Operand nIntOrLong2 = default; + + if (!signed) + { + nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); + } + + int fpMaxVal = 0x4F000000; // 2.14748365E9f (2147483648) + + Operand fpMaxValMask = X86GetScalar(context, fpMaxVal); + + Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes); + + if (!signed) + { + nRes = context.AddIntrinsic(Intrinsic.X86Subss, nRes, fpMaxValMask); + + nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); + + nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtss2si, nRes); + } + + nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan)); + + Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes); + + Operand dRes; + if (signed) + { + dRes = context.BitwiseExclusiveOr(nIntOrLong, nInt); + } + else + { + dRes = context.BitwiseExclusiveOr(nIntOrLong2, nInt); + dRes = context.Add(dRes, nIntOrLong); + } + + InsertScalar(context, op.Vd, dRes); + } + else + { + Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n); + + if (roundMode != FPRoundingMode.ToNearestAway) + { + nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode))); + } + else + { + nRes = EmitSse41RoundToNearestWithTiesToAwayOpF(context, nRes, scalar: true); + } + + Operand zero = context.VectorZero(); + + Operand nCmp; + Operand nIntOrLong2 = default; + + if (!signed) + { + nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); + } + + long fpMaxVal = 0x41E0000000000000L; // 2147483648.0000000d (2147483648) + + Operand fpMaxValMask = X86GetScalar(context, fpMaxVal); + + Operand nIntOrLong = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes); + + if (!signed) + { + nRes = context.AddIntrinsic(Intrinsic.X86Subsd, nRes, fpMaxValMask); + + nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); + + nIntOrLong2 = context.AddIntrinsicInt(Intrinsic.X86Cvtsd2si, nRes); + } + + nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan)); + + Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes); + nLong = context.ConvertI64ToI32(nLong); + + Operand dRes; + if (signed) + { + dRes = context.BitwiseExclusiveOr(nIntOrLong, nLong); + } + else + { + dRes = context.BitwiseExclusiveOr(nIntOrLong2, nLong); + dRes = context.Add(dRes, nIntOrLong); + } + + InsertScalar(context, op.Vd, dRes); + } + } + + private static void EmitSse41ConvertVector32(ArmEmitterContext context, FPRoundingMode roundMode, bool signed) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + EmitVectorUnaryOpSimd32(context, (n) => + { + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n); + + nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode))); + + Operand zero = context.VectorZero(); + Operand nCmp; + if (!signed) + { + nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); + } + + Operand fpMaxValMask = X86GetAllElements(context, 0x4F000000); // 2.14748365E9f (2147483648) + + Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes); + Operand nInt2 = default; + + if (!signed) + { + nRes = context.AddIntrinsic(Intrinsic.X86Subps, nRes, fpMaxValMask); + + nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); + + nInt2 = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes); + } + + nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan)); + + if (signed) + { + return context.AddIntrinsic(Intrinsic.X86Pxor, nInt, nRes); + } + else + { + Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt2, nRes); + return context.AddIntrinsic(Intrinsic.X86Paddd, dRes, nInt); + } + } + else /* if (sizeF == 1) */ + { + Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n); + + nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode))); + + Operand zero = context.VectorZero(); + Operand nCmp; + if (!signed) + { + nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); + } + + Operand fpMaxValMask = X86GetAllElements(context, 0x43E0000000000000L); // 9.2233720368547760E18d (9223372036854775808) + + Operand nLong = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false); + Operand nLong2 = default; + + if (!signed) + { + nRes = context.AddIntrinsic(Intrinsic.X86Subpd, nRes, fpMaxValMask); + + nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual)); + nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp); + + nLong2 = InstEmit.EmitSse2CvtDoubleToInt64OpF(context, nRes, false); + } + + nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan)); + + if (signed) + { + return context.AddIntrinsic(Intrinsic.X86Pxor, nLong, nRes); + } + else + { + Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong2, nRes); + return context.AddIntrinsic(Intrinsic.X86Paddq, dRes, nLong); + } + } + }); + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHash.cs b/src/ARMeilleure/Instructions/InstEmitSimdHash.cs new file mode 100644 index 00000000..4fb048ee --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSimdHash.cs @@ -0,0 +1,147 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { +#region "Sha1" + public static void Sha1c_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + + Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0); + + Operand m = GetVec(op.Rm); + + Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashChoose)), d, ne, m); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha1h_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0); + + Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.FixedRotate)), ne); + + context.Copy(GetVec(op.Rd), context.VectorCreateScalar(res)); + } + + public static void Sha1m_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + + Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0); + + Operand m = GetVec(op.Rm); + + Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashMajority)), d, ne, m); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha1p_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + + Operand ne = context.VectorExtract(OperandType.I32, GetVec(op.Rn), 0); + + Operand m = GetVec(op.Rm); + + Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashParity)), d, ne, m); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha1su0_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha1SchedulePart1)), d, n, m); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha1su1_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Operand res = context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha1SchedulePart2)), d, n); + + context.Copy(GetVec(op.Rd), res); + } +#endregion + +#region "Sha256" + public static void Sha256h_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = InstEmitSimdHashHelper.EmitSha256h(context, d, n, m, part2: false); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha256h2_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = InstEmitSimdHashHelper.EmitSha256h(context, n, d, m, part2: true); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha256su0_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Operand res = InstEmitSimdHashHelper.EmitSha256su0(context, d, n); + + context.Copy(GetVec(op.Rd), res); + } + + public static void Sha256su1_V(ArmEmitterContext context) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = InstEmitSimdHashHelper.EmitSha256su1(context, d, n, m); + + context.Copy(GetVec(op.Rd), res); + } +#endregion + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHash32.cs b/src/ARMeilleure/Instructions/InstEmitSimdHash32.cs new file mode 100644 index 00000000..51334608 --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSimdHash32.cs @@ -0,0 +1,64 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { +#region "Sha256" + public static void Sha256h_V(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand d = GetVecA32(op.Qd); + Operand n = GetVecA32(op.Qn); + Operand m = GetVecA32(op.Qm); + + Operand res = InstEmitSimdHashHelper.EmitSha256h(context, d, n, m, part2: false); + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void Sha256h2_V(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand d = GetVecA32(op.Qd); + Operand n = GetVecA32(op.Qn); + Operand m = GetVecA32(op.Qm); + + Operand res = InstEmitSimdHashHelper.EmitSha256h(context, n, d, m, part2: true); + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void Sha256su0_V(ArmEmitterContext context) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + Operand d = GetVecA32(op.Qd); + Operand m = GetVecA32(op.Qm); + + Operand res = InstEmitSimdHashHelper.EmitSha256su0(context, d, m); + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void Sha256su1_V(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand d = GetVecA32(op.Qd); + Operand n = GetVecA32(op.Qn); + Operand m = GetVecA32(op.Qm); + + Operand res = InstEmitSimdHashHelper.EmitSha256su1(context, d, n, m); + + context.Copy(GetVecA32(op.Qd), res); + } +#endregion + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHashHelper.cs b/src/ARMeilleure/Instructions/InstEmitSimdHashHelper.cs new file mode 100644 index 00000000..23e4948d --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSimdHashHelper.cs @@ -0,0 +1,56 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static class InstEmitSimdHashHelper + { + public static Operand EmitSha256h(ArmEmitterContext context, Operand x, Operand y, Operand w, bool part2) + { + if (Optimizations.UseSha) + { + Operand src1 = context.AddIntrinsic(Intrinsic.X86Shufps, y, x, Const(0xbb)); + Operand src2 = context.AddIntrinsic(Intrinsic.X86Shufps, y, x, Const(0x11)); + Operand w2 = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, w, w); + + Operand round2 = context.AddIntrinsic(Intrinsic.X86Sha256Rnds2, src1, src2, w); + Operand round4 = context.AddIntrinsic(Intrinsic.X86Sha256Rnds2, src2, round2, w2); + + Operand res = context.AddIntrinsic(Intrinsic.X86Shufps, round4, round2, Const(part2 ? 0x11 : 0xbb)); + + return res; + } + + String method = part2 ? nameof(SoftFallback.HashUpper) : nameof(SoftFallback.HashLower); + return context.Call(typeof(SoftFallback).GetMethod(method), x, y, w); + } + + public static Operand EmitSha256su0(ArmEmitterContext context, Operand x, Operand y) + { + if (Optimizations.UseSha) + { + return context.AddIntrinsic(Intrinsic.X86Sha256Msg1, x, y); + } + + return context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart1)), x, y); + } + + public static Operand EmitSha256su1(ArmEmitterContext context, Operand x, Operand y, Operand z) + { + if (Optimizations.UseSha && Optimizations.UseSsse3) + { + Operand extr = context.AddIntrinsic(Intrinsic.X86Palignr, z, y, Const(4)); + Operand tmp = context.AddIntrinsic(Intrinsic.X86Paddd, extr, x); + + Operand res = context.AddIntrinsic(Intrinsic.X86Sha256Msg2, tmp, z); + + return res; + } + + return context.Call(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart2)), x, y, z); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHelper.cs b/src/ARMeilleure/Instructions/InstEmitSimdHelper.cs new file mode 100644 index 00000000..c44c9b4d --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSimdHelper.cs @@ -0,0 +1,2088 @@ +using ARMeilleure.CodeGen.X86; +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; +using System.Reflection; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + using Func1I = Func<Operand, Operand>; + using Func2I = Func<Operand, Operand, Operand>; + using Func3I = Func<Operand, Operand, Operand, Operand>; + + static class InstEmitSimdHelper + { +#region "Masks" + public static readonly long[] EvenMasks = new long[] + { + 14L << 56 | 12L << 48 | 10L << 40 | 08L << 32 | 06L << 24 | 04L << 16 | 02L << 8 | 00L << 0, // B + 13L << 56 | 12L << 48 | 09L << 40 | 08L << 32 | 05L << 24 | 04L << 16 | 01L << 8 | 00L << 0, // H + 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 03L << 24 | 02L << 16 | 01L << 8 | 00L << 0 // S + }; + + public static readonly long[] OddMasks = new long[] + { + 15L << 56 | 13L << 48 | 11L << 40 | 09L << 32 | 07L << 24 | 05L << 16 | 03L << 8 | 01L << 0, // B + 15L << 56 | 14L << 48 | 11L << 40 | 10L << 32 | 07L << 24 | 06L << 16 | 03L << 8 | 02L << 0, // H + 15L << 56 | 14L << 48 | 13L << 40 | 12L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0 // S + }; + + public static readonly long ZeroMask = 128L << 56 | 128L << 48 | 128L << 40 | 128L << 32 | 128L << 24 | 128L << 16 | 128L << 8 | 128L << 0; + + public static ulong X86GetGf2p8LogicalShiftLeft(int shift) + { + ulong identity = (0b00000001UL << 56) | (0b00000010UL << 48) | (0b00000100UL << 40) | (0b00001000UL << 32) | + (0b00010000UL << 24) | (0b00100000UL << 16) | (0b01000000UL << 8) | (0b10000000UL << 0); + + return shift >= 0 ? identity >> (shift * 8) : identity << (-shift * 8); + } +#endregion + +#region "X86 SSE Intrinsics" + public static readonly Intrinsic[] X86PaddInstruction = new Intrinsic[] + { + Intrinsic.X86Paddb, + Intrinsic.X86Paddw, + Intrinsic.X86Paddd, + Intrinsic.X86Paddq + }; + + public static readonly Intrinsic[] X86PcmpeqInstruction = new Intrinsic[] + { + Intrinsic.X86Pcmpeqb, + Intrinsic.X86Pcmpeqw, + Intrinsic.X86Pcmpeqd, + Intrinsic.X86Pcmpeqq + }; + + public static readonly Intrinsic[] X86PcmpgtInstruction = new Intrinsic[] + { + Intrinsic.X86Pcmpgtb, + Intrinsic.X86Pcmpgtw, + Intrinsic.X86Pcmpgtd, + Intrinsic.X86Pcmpgtq + }; + + public static readonly Intrinsic[] X86PmaxsInstruction = new Intrinsic[] + { + Intrinsic.X86Pmaxsb, + Intrinsic.X86Pmaxsw, + Intrinsic.X86Pmaxsd + }; + + public static readonly Intrinsic[] X86PmaxuInstruction = new Intrinsic[] + { + Intrinsic.X86Pmaxub, + Intrinsic.X86Pmaxuw, + Intrinsic.X86Pmaxud + }; + + public static readonly Intrinsic[] X86PminsInstruction = new Intrinsic[] + { + Intrinsic.X86Pminsb, + Intrinsic.X86Pminsw, + Intrinsic.X86Pminsd + }; + + public static readonly Intrinsic[] X86PminuInstruction = new Intrinsic[] + { + Intrinsic.X86Pminub, + Intrinsic.X86Pminuw, + Intrinsic.X86Pminud + }; + + public static readonly Intrinsic[] X86PmovsxInstruction = new Intrinsic[] + { + Intrinsic.X86Pmovsxbw, + Intrinsic.X86Pmovsxwd, + Intrinsic.X86Pmovsxdq + }; + + public static readonly Intrinsic[] X86PmovzxInstruction = new Intrinsic[] + { + Intrinsic.X86Pmovzxbw, + Intrinsic.X86Pmovzxwd, + Intrinsic.X86Pmovzxdq + }; + + public static readonly Intrinsic[] X86PsllInstruction = new Intrinsic[] + { + 0, + Intrinsic.X86Psllw, + Intrinsic.X86Pslld, + Intrinsic.X86Psllq + }; + + public static readonly Intrinsic[] X86PsraInstruction = new Intrinsic[] + { + 0, + Intrinsic.X86Psraw, + Intrinsic.X86Psrad + }; + + public static readonly Intrinsic[] X86PsrlInstruction = new Intrinsic[] + { + 0, + Intrinsic.X86Psrlw, + Intrinsic.X86Psrld, + Intrinsic.X86Psrlq + }; + + public static readonly Intrinsic[] X86PsubInstruction = new Intrinsic[] + { + Intrinsic.X86Psubb, + Intrinsic.X86Psubw, + Intrinsic.X86Psubd, + Intrinsic.X86Psubq + }; + + public static readonly Intrinsic[] X86PunpckhInstruction = new Intrinsic[] + { + Intrinsic.X86Punpckhbw, + Intrinsic.X86Punpckhwd, + Intrinsic.X86Punpckhdq, + Intrinsic.X86Punpckhqdq + }; + + public static readonly Intrinsic[] X86PunpcklInstruction = new Intrinsic[] + { + Intrinsic.X86Punpcklbw, + Intrinsic.X86Punpcklwd, + Intrinsic.X86Punpckldq, + Intrinsic.X86Punpcklqdq + }; +#endregion + + public static void EnterArmFpMode(EmitterContext context, Func<FPState, Operand> getFpFlag) + { + if (Optimizations.UseSse2) + { + Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr); + + Operand fzTrue = getFpFlag(FPState.FzFlag); + Operand r0True = getFpFlag(FPState.RMode0Flag); + Operand r1True = getFpFlag(FPState.RMode1Flag); + + mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Rhi | Mxcsr.Rlo))); + + mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(fzTrue, Const((int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Um | Mxcsr.Dm)), Const(0))); + + // X86 round modes in order: nearest, negative, positive, zero + // ARM round modes in order: nearest, positive, negative, zero + // Read the bits backwards to correct this. + + mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(r0True, Const((int)Mxcsr.Rhi), Const(0))); + mxcsr = context.BitwiseOr(mxcsr, context.ConditionalSelect(r1True, Const((int)Mxcsr.Rlo), Const(0))); + + context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr); + } + else if (Optimizations.UseAdvSimd) + { + Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr); + + Operand fzTrue = getFpFlag(FPState.FzFlag); + Operand r0True = getFpFlag(FPState.RMode0Flag); + Operand r1True = getFpFlag(FPState.RMode1Flag); + + fpcr = context.BitwiseAnd(fpcr, Const(~(int)(FPCR.Fz | FPCR.RMode0 | FPCR.RMode1))); + + fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(fzTrue, Const((int)FPCR.Fz), Const(0))); + fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(r0True, Const((int)FPCR.RMode0), Const(0))); + fpcr = context.BitwiseOr(fpcr, context.ConditionalSelect(r1True, Const((int)FPCR.RMode1), Const(0))); + + context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr); + + // TODO: Restore FPSR + } + } + + public static void ExitArmFpMode(EmitterContext context, Action<FPState, Operand> setFpFlag) + { + if (Optimizations.UseSse2) + { + Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr); + + // Unset round mode (to nearest) and ftz. + mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)(Mxcsr.Ftz | Mxcsr.Daz | Mxcsr.Rhi | Mxcsr.Rlo))); + + context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr); + + // Status flags would be stored here if they were used. + } + else if (Optimizations.UseAdvSimd) + { + Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr); + + // Unset round mode (to nearest) and fz. + fpcr = context.BitwiseAnd(fpcr, Const(~(int)(FPCR.Fz | FPCR.RMode0 | FPCR.RMode1))); + + context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr); + + // TODO: Store FPSR + } + } + + public static int GetImmShl(OpCodeSimdShImm op) + { + return op.Imm - (8 << op.Size); + } + + public static int GetImmShr(OpCodeSimdShImm op) + { + return (8 << (op.Size + 1)) - op.Imm; + } + + public static Operand X86GetScalar(ArmEmitterContext context, float value) + { + return X86GetScalar(context, BitConverter.SingleToInt32Bits(value)); + } + + public static Operand X86GetScalar(ArmEmitterContext context, double value) + { + return X86GetScalar(context, BitConverter.DoubleToInt64Bits(value)); + } + + public static Operand X86GetScalar(ArmEmitterContext context, int value) + { + return context.VectorCreateScalar(Const(value)); + } + + public static Operand X86GetScalar(ArmEmitterContext context, long value) + { + return context.VectorCreateScalar(Const(value)); + } + + public static Operand X86GetAllElements(ArmEmitterContext context, float value) + { + return X86GetAllElements(context, BitConverter.SingleToInt32Bits(value)); + } + + public static Operand X86GetAllElements(ArmEmitterContext context, double value) + { + return X86GetAllElements(context, BitConverter.DoubleToInt64Bits(value)); + } + + public static Operand X86GetAllElements(ArmEmitterContext context, short value) + { + ulong value1 = (ushort)value; + ulong value2 = value1 << 16 | value1; + ulong value4 = value2 << 32 | value2; + + return X86GetAllElements(context, (long)value4); + } + + public static Operand X86GetAllElements(ArmEmitterContext context, int value) + { + Operand vector = context.VectorCreateScalar(Const(value)); + + vector = context.AddIntrinsic(Intrinsic.X86Shufps, vector, vector, Const(0)); + + return vector; + } + + public static Operand X86GetAllElements(ArmEmitterContext context, long value) + { + Operand vector = context.VectorCreateScalar(Const(value)); + + vector = context.AddIntrinsic(Intrinsic.X86Movlhps, vector, vector); + + return vector; + } + + public static Operand X86GetElements(ArmEmitterContext context, long e1, long e0) + { + return X86GetElements(context, (ulong)e1, (ulong)e0); + } + + public static Operand X86GetElements(ArmEmitterContext context, ulong e1, ulong e0) + { + Operand vector0 = context.VectorCreateScalar(Const(e0)); + Operand vector1 = context.VectorCreateScalar(Const(e1)); + + return context.AddIntrinsic(Intrinsic.X86Punpcklqdq, vector0, vector1); + } + + public static int X86GetRoundControl(FPRoundingMode roundMode) + { + switch (roundMode) + { + case FPRoundingMode.ToNearest: return 8 | 0; // even + case FPRoundingMode.TowardsPlusInfinity: return 8 | 2; + case FPRoundingMode.TowardsMinusInfinity: return 8 | 1; + case FPRoundingMode.TowardsZero: return 8 | 3; + } + + throw new ArgumentException($"Invalid rounding mode \"{roundMode}\"."); + } + + public static Operand EmitSse41RoundToNearestWithTiesToAwayOpF(ArmEmitterContext context, Operand n, bool scalar) + { + Debug.Assert(n.Type == OperandType.V128); + + Operand nCopy = context.Copy(n); + + Operand rC = Const(X86GetRoundControl(FPRoundingMode.TowardsZero)); + + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + if ((op.Size & 1) == 0) + { + Operand signMask = scalar ? X86GetScalar(context, int.MinValue) : X86GetAllElements(context, int.MinValue); + signMask = context.AddIntrinsic(Intrinsic.X86Pand, signMask, nCopy); + + // 0x3EFFFFFF == BitConverter.SingleToInt32Bits(0.5f) - 1 + Operand valueMask = scalar ? X86GetScalar(context, 0x3EFFFFFF) : X86GetAllElements(context, 0x3EFFFFFF); + valueMask = context.AddIntrinsic(Intrinsic.X86Por, valueMask, signMask); + + nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Addss : Intrinsic.X86Addps, nCopy, valueMask); + + nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Roundss : Intrinsic.X86Roundps, nCopy, rC); + } + else + { + Operand signMask = scalar ? X86GetScalar(context, long.MinValue) : X86GetAllElements(context, long.MinValue); + signMask = context.AddIntrinsic(Intrinsic.X86Pand, signMask, nCopy); + + // 0x3FDFFFFFFFFFFFFFL == BitConverter.DoubleToInt64Bits(0.5d) - 1L + Operand valueMask = scalar ? X86GetScalar(context, 0x3FDFFFFFFFFFFFFFL) : X86GetAllElements(context, 0x3FDFFFFFFFFFFFFFL); + valueMask = context.AddIntrinsic(Intrinsic.X86Por, valueMask, signMask); + + nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Addsd : Intrinsic.X86Addpd, nCopy, valueMask); + + nCopy = context.AddIntrinsic(scalar ? Intrinsic.X86Roundsd : Intrinsic.X86Roundpd, nCopy, rC); + } + + return nCopy; + } + + public static Operand EmitCountSetBits8(ArmEmitterContext context, Operand op) // "size" is 8 (SIMD&FP Inst.). + { + Debug.Assert(op.Type == OperandType.I32 || op.Type == OperandType.I64); + + Operand op0 = context.Subtract(op, context.BitwiseAnd(context.ShiftRightUI(op, Const(1)), Const(op.Type, 0x55L))); + + Operand c1 = Const(op.Type, 0x33L); + Operand op1 = context.Add(context.BitwiseAnd(context.ShiftRightUI(op0, Const(2)), c1), context.BitwiseAnd(op0, c1)); + + return context.BitwiseAnd(context.Add(op1, context.ShiftRightUI(op1, Const(4))), Const(op.Type, 0x0fL)); + } + + public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; + + Operand res = context.AddIntrinsic(inst, n); + + if ((op.Size & 1) != 0) + { + res = context.VectorZeroUpper64(res); + } + else + { + res = context.VectorZeroUpper96(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; + + Operand res = context.AddIntrinsic(inst, n, m); + + if ((op.Size & 1) != 0) + { + res = context.VectorZeroUpper64(res); + } + else + { + res = context.VectorZeroUpper96(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; + + Operand res = context.AddIntrinsic(inst, n); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; + + Operand res = context.AddIntrinsic(inst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static Operand EmitUnaryMathCall(ArmEmitterContext context, string name, Operand n) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + MethodInfo info = (op.Size & 1) == 0 + ? typeof(MathF).GetMethod(name, new Type[] { typeof(float) }) + : typeof(Math). GetMethod(name, new Type[] { typeof(double) }); + + return context.Call(info, n); + } + + public static Operand EmitRoundMathCall(ArmEmitterContext context, MidpointRounding roundMode, Operand n) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + string name = nameof(Math.Round); + + MethodInfo info = (op.Size & 1) == 0 + ? typeof(MathF).GetMethod(name, new Type[] { typeof(float), typeof(MidpointRounding) }) + : typeof(Math). GetMethod(name, new Type[] { typeof(double), typeof(MidpointRounding) }); + + return context.Call(info, n, Const((int)roundMode)); + } + + public static Operand EmitGetRoundingMode(ArmEmitterContext context) + { + Operand rMode = context.ShiftLeft(GetFpFlag(FPState.RMode1Flag), Const(1)); + rMode = context.BitwiseOr(rMode, GetFpFlag(FPState.RMode0Flag)); + + return rMode; + } + + public static Operand EmitRoundByRMode(ArmEmitterContext context, Operand op) + { + Debug.Assert(op.Type == OperandType.FP32 || op.Type == OperandType.FP64); + + Operand lbl1 = Label(); + Operand lbl2 = Label(); + Operand lbl3 = Label(); + Operand lblEnd = Label(); + + Operand rN = Const((int)FPRoundingMode.ToNearest); + Operand rP = Const((int)FPRoundingMode.TowardsPlusInfinity); + Operand rM = Const((int)FPRoundingMode.TowardsMinusInfinity); + + Operand res = context.AllocateLocal(op.Type); + + Operand rMode = EmitGetRoundingMode(context); + + context.BranchIf(lbl1, rMode, rN, Comparison.NotEqual); + context.Copy(res, EmitRoundMathCall(context, MidpointRounding.ToEven, op)); + context.Branch(lblEnd); + + context.MarkLabel(lbl1); + context.BranchIf(lbl2, rMode, rP, Comparison.NotEqual); + context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Ceiling), op)); + context.Branch(lblEnd); + + context.MarkLabel(lbl2); + context.BranchIf(lbl3, rMode, rM, Comparison.NotEqual); + context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Floor), op)); + context.Branch(lblEnd); + + context.MarkLabel(lbl3); + context.Copy(res, EmitUnaryMathCall(context, nameof(Math.Truncate), op)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + public static Operand EmitSoftFloatCall(ArmEmitterContext context, string name, params Operand[] callArgs) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + MethodInfo info = (op.Size & 1) == 0 + ? typeof(SoftFloat32).GetMethod(name) + : typeof(SoftFloat64).GetMethod(name); + + context.ExitArmFpMode(); + context.StoreToContext(); + Operand res = context.Call(info, callArgs); + context.LoadFromContext(); + context.EnterArmFpMode(); + + return res; + } + + public static void EmitScalarBinaryOpByElemF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0)); + } + + public static void EmitScalarTernaryOpByElemF(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand d = context.VectorExtract(type, GetVec(op.Rd), 0); + Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand m = context.VectorExtract(type, GetVec(op.Rm), op.Index); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(d, n, m), 0)); + } + + public static void EmitScalarUnaryOpSx(ArmEmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size); + + Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarBinaryOpSx(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = EmitVectorExtractSx(context, op.Rn, 0, op.Size); + Operand m = EmitVectorExtractSx(context, op.Rm, 0, op.Size); + + Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarUnaryOpZx(ArmEmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size); + + Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarBinaryOpZx(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size); + Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size); + + Operand d = EmitVectorInsert(context, context.VectorZero(), emit(n, m), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarTernaryOpZx(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = EmitVectorExtractZx(context, op.Rd, 0, op.Size); + Operand n = EmitVectorExtractZx(context, op.Rn, 0, op.Size); + Operand m = EmitVectorExtractZx(context, op.Rm, 0, op.Size); + + d = EmitVectorInsert(context, context.VectorZero(), emit(d, n, m), 0, op.Size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitScalarUnaryOpF(ArmEmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n), 0)); + } + + public static void EmitScalarBinaryOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand m = context.VectorExtract(type, GetVec(op.Rm), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(n, m), 0)); + } + + public static void EmitScalarTernaryRaOpF(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand a = context.VectorExtract(type, GetVec(op.Ra), 0); + Operand n = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand m = context.VectorExtract(type, GetVec(op.Rm), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), emit(a, n, m), 0)); + } + + public static void EmitVectorUnaryOpF(ArmEmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); + + res = context.VectorInsert(res, emit(ne), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); + Operand me = context.VectorExtract(type, GetVec(op.Rm), index); + + res = context.VectorInsert(res, emit(ne, me), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpF(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand de = context.VectorExtract(type, GetVec(op.Rd), index); + Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); + Operand me = context.VectorExtract(type, GetVec(op.Rm), index); + + res = context.VectorInsert(res, emit(de, ne, me), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpByElemF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); + Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index); + + res = context.VectorInsert(res, emit(ne, me), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpByElemF(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + for (int index = 0; index < elems; index++) + { + Operand de = context.VectorExtract(type, GetVec(op.Rd), index); + Operand ne = context.VectorExtract(type, GetVec(op.Rn), index); + Operand me = context.VectorExtract(type, GetVec(op.Rm), op.Index); + + res = context.VectorInsert(res, emit(de, ne, me), index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorUnaryOpSx(ArmEmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpSx(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpSx(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtractSx(context, op.Rd, index, op.Size); + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractSx(context, op.Rm, index, op.Size); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorUnaryOpZx(ArmEmitterContext context, Func1I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpZx(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpZx(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, index, op.Size); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpByElemSx(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtractSx(context, op.Rm, op.Index, op.Size); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorBinaryOpByElemZx(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorTernaryOpByElemZx(ArmEmitterContext context, Func3I emit) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtractZx(context, op.Rm, op.Index, op.Size); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorImmUnaryOp(ArmEmitterContext context, Func1I emit) + { + OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; + + Operand imm = Const(op.Immediate); + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + res = EmitVectorInsert(context, res, emit(imm), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorImmBinaryOp(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; + + Operand imm = Const(op.Immediate); + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); + + res = EmitVectorInsert(context, res, emit(de, imm), index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenRmBinaryOpSx(ArmEmitterContext context, Func2I emit) + { + EmitVectorWidenRmBinaryOp(context, emit, signed: true); + } + + public static void EmitVectorWidenRmBinaryOpZx(ArmEmitterContext context, Func2I emit) + { + EmitVectorWidenRmBinaryOp(context, emit, signed: false); + } + + private static void EmitVectorWidenRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signed); + Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenRnRmBinaryOpSx(ArmEmitterContext context, Func2I emit) + { + EmitVectorWidenRnRmBinaryOp(context, emit, signed: true); + } + + public static void EmitVectorWidenRnRmBinaryOpZx(ArmEmitterContext context, Func2I emit) + { + EmitVectorWidenRnRmBinaryOp(context, emit, signed: false); + } + + private static void EmitVectorWidenRnRmBinaryOp(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenRnRmTernaryOpSx(ArmEmitterContext context, Func3I emit) + { + EmitVectorWidenRnRmTernaryOp(context, emit, signed: true); + } + + public static void EmitVectorWidenRnRmTernaryOpZx(ArmEmitterContext context, Func3I emit) + { + EmitVectorWidenRnRmTernaryOp(context, emit, signed: false); + } + + private static void EmitVectorWidenRnRmTernaryOp(ArmEmitterContext context, Func3I emit, bool signed) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed); + Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + Operand me = EmitVectorExtract(context, op.Rm, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenBinaryOpByElemSx(ArmEmitterContext context, Func2I emit) + { + EmitVectorWidenBinaryOpByElem(context, emit, signed: true); + } + + public static void EmitVectorWidenBinaryOpByElemZx(ArmEmitterContext context, Func2I emit) + { + EmitVectorWidenBinaryOpByElem(context, emit, signed: false); + } + + private static void EmitVectorWidenBinaryOpByElem(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorWidenTernaryOpByElemSx(ArmEmitterContext context, Func3I emit) + { + EmitVectorWidenTernaryOpByElem(context, emit, signed: true); + } + + public static void EmitVectorWidenTernaryOpByElemZx(ArmEmitterContext context, Func3I emit) + { + EmitVectorWidenTernaryOpByElem(context, emit, signed: false); + } + + private static void EmitVectorWidenTernaryOpByElem(ArmEmitterContext context, Func3I emit, bool signed) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand res = context.VectorZero(); + + Operand me = EmitVectorExtract(context, op.Rm, op.Index, op.Size, signed); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtract(context, op.Rd, index, op.Size + 1, signed); + Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitVectorPairwiseOpSx(ArmEmitterContext context, Func2I emit) + { + EmitVectorPairwiseOp(context, emit, signed: true); + } + + public static void EmitVectorPairwiseOpZx(ArmEmitterContext context, Func2I emit) + { + EmitVectorPairwiseOp(context, emit, signed: false); + } + + private static void EmitVectorPairwiseOp(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int pairs = op.GetPairsCount() >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand n0 = EmitVectorExtract(context, op.Rn, pairIndex, op.Size, signed); + Operand n1 = EmitVectorExtract(context, op.Rn, pairIndex + 1, op.Size, signed); + + Operand m0 = EmitVectorExtract(context, op.Rm, pairIndex, op.Size, signed); + Operand m1 = EmitVectorExtract(context, op.Rm, pairIndex + 1, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(n0, n1), index, op.Size); + res = EmitVectorInsert(context, res, emit(m0, m1), pairs + index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitSsse3VectorPairwiseOp(ArmEmitterContext context, Intrinsic[] inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd64) + { + Operand zeroEvenMask = X86GetElements(context, ZeroMask, EvenMasks[op.Size]); + Operand zeroOddMask = X86GetElements(context, ZeroMask, OddMasks [op.Size]); + + Operand mN = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m); // m:n + + Operand left = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroEvenMask); // 0:even from m:n + Operand right = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroOddMask); // 0:odd from m:n + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right)); + } + else if (op.Size < 3) + { + Operand oddEvenMask = X86GetElements(context, OddMasks[op.Size], EvenMasks[op.Size]); + + Operand oddEvenN = context.AddIntrinsic(Intrinsic.X86Pshufb, n, oddEvenMask); // odd:even from n + Operand oddEvenM = context.AddIntrinsic(Intrinsic.X86Pshufb, m, oddEvenMask); // odd:even from m + + Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, oddEvenN, oddEvenM); + Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, oddEvenN, oddEvenM); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[op.Size], left, right)); + } + else + { + Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m); + Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, n, m); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst[3], left, right)); + } + } + + public static void EmitVectorAcrossVectorOpSx(ArmEmitterContext context, Func2I emit) + { + EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: false); + } + + public static void EmitVectorAcrossVectorOpZx(ArmEmitterContext context, Func2I emit) + { + EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: false); + } + + public static void EmitVectorLongAcrossVectorOpSx(ArmEmitterContext context, Func2I emit) + { + EmitVectorAcrossVectorOp(context, emit, signed: true, isLong: true); + } + + public static void EmitVectorLongAcrossVectorOpZx(ArmEmitterContext context, Func2I emit) + { + EmitVectorAcrossVectorOp(context, emit, signed: false, isLong: true); + } + + private static void EmitVectorAcrossVectorOp( + ArmEmitterContext context, + Func2I emit, + bool signed, + bool isLong) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int elems = op.GetBytesCount() >> op.Size; + + Operand res = EmitVectorExtract(context, op.Rn, 0, op.Size, signed); + + for (int index = 1; index < elems; index++) + { + Operand n = EmitVectorExtract(context, op.Rn, index, op.Size, signed); + + res = emit(res, n); + } + + int size = isLong ? op.Size + 1 : op.Size; + + Operand d = EmitVectorInsert(context, context.VectorZero(), res, 0, size); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitVectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128); + + Operand res = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0); + + for (int index = 1; index < 4; index++) + { + Operand n = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), index); + + res = emit(res, n); + } + + Operand d = context.VectorInsert(context.VectorZero(), res, 0); + + context.Copy(GetVec(op.Rd), d); + } + + public static void EmitSse2VectorAcrossVectorOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Debug.Assert((op.Size & 1) == 0 && op.RegisterSize == RegisterSize.Simd128); + + const int sm0 = 0 << 6 | 0 << 4 | 0 << 2 | 0 << 0; + const int sm1 = 1 << 6 | 1 << 4 | 1 << 2 | 1 << 0; + const int sm2 = 2 << 6 | 2 << 4 | 2 << 2 | 2 << 0; + const int sm3 = 3 << 6 | 3 << 4 | 3 << 2 | 3 << 0; + + Operand nCopy = context.Copy(GetVec(op.Rn)); + + Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm0)); + Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm1)); + Operand part2 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm2)); + Operand part3 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, nCopy, Const(sm3)); + + Operand res = emit(emit(part0, part1), emit(part2, part3)); + + context.Copy(GetVec(op.Rd), context.VectorZeroUpper96(res)); + } + + public static void EmitScalarPairwiseOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand ne0 = context.VectorExtract(type, GetVec(op.Rn), 0); + Operand ne1 = context.VectorExtract(type, GetVec(op.Rn), 1); + + Operand res = context.VectorInsert(context.VectorZero(), emit(ne0, ne1), 0); + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitSse2ScalarPairwiseOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Operand op0, op1; + + if ((op.Size & 1) == 0) + { + const int sm0 = 2 << 6 | 2 << 4 | 2 << 2 | 0 << 0; + const int sm1 = 2 << 6 | 2 << 4 | 2 << 2 | 1 << 0; + + Operand zeroN = context.VectorZeroUpper64(n); + + op0 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(sm0)); + op1 = context.AddIntrinsic(Intrinsic.X86Pshufd, zeroN, Const(sm1)); + } + else /* if ((op.Size & 1) == 1) */ + { + Operand zero = context.VectorZero(); + + op0 = context.AddIntrinsic(Intrinsic.X86Movlhps, n, zero); + op1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, n); + } + + context.Copy(GetVec(op.Rd), emit(op0, op1)); + } + + public static void EmitVectorPairwiseOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int pairs = op.GetPairsCount() >> sizeF + 2; + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand n0 = context.VectorExtract(type, GetVec(op.Rn), pairIndex); + Operand n1 = context.VectorExtract(type, GetVec(op.Rn), pairIndex + 1); + + Operand m0 = context.VectorExtract(type, GetVec(op.Rm), pairIndex); + Operand m1 = context.VectorExtract(type, GetVec(op.Rm), pairIndex + 1); + + res = context.VectorInsert(res, emit(n0, n1), index); + res = context.VectorInsert(res, emit(m0, m1), pairs + index); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitSse2VectorPairwiseOpF(ArmEmitterContext context, Func2I emit) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand nCopy = context.Copy(GetVec(op.Rn)); + Operand mCopy = context.Copy(GetVec(op.Rm)); + + int sizeF = op.Size & 1; + + if (sizeF == 0) + { + if (op.RegisterSize == RegisterSize.Simd64) + { + Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, nCopy, mCopy); + + Operand zero = context.VectorZero(); + + Operand part0 = context.AddIntrinsic(Intrinsic.X86Movlhps, unpck, zero); + Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, zero, unpck); + + context.Copy(GetVec(op.Rd), emit(part0, part1)); + } + else /* if (op.RegisterSize == RegisterSize.Simd128) */ + { + const int sm0 = 2 << 6 | 0 << 4 | 2 << 2 | 0 << 0; + const int sm1 = 3 << 6 | 1 << 4 | 3 << 2 | 1 << 0; + + Operand part0 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(sm0)); + Operand part1 = context.AddIntrinsic(Intrinsic.X86Shufps, nCopy, mCopy, Const(sm1)); + + context.Copy(GetVec(op.Rd), emit(part0, part1)); + } + } + else /* if (sizeF == 1) */ + { + Operand part0 = context.AddIntrinsic(Intrinsic.X86Unpcklpd, nCopy, mCopy); + Operand part1 = context.AddIntrinsic(Intrinsic.X86Unpckhpd, nCopy, mCopy); + + context.Copy(GetVec(op.Rd), emit(part0, part1)); + } + } + + public enum CmpCondition + { + // Legacy Sse. + Equal = 0, // Ordered, non-signaling. + LessThan = 1, // Ordered, signaling. + LessThanOrEqual = 2, // Ordered, signaling. + UnorderedQ = 3, // Non-signaling. + NotLessThan = 5, // Unordered, signaling. + NotLessThanOrEqual = 6, // Unordered, signaling. + OrderedQ = 7, // Non-signaling. + + // Vex. + GreaterThanOrEqual = 13, // Ordered, signaling. + GreaterThan = 14, // Ordered, signaling. + OrderedS = 23 // Signaling. + } + + [Flags] + public enum SaturatingFlags + { + None = 0, + + ByElem = 1 << 0, + Scalar = 1 << 1, + Signed = 1 << 2, + + Add = 1 << 3, + Sub = 1 << 4, + + Accumulate = 1 << 5 + } + + public static void EmitScalarSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit) + { + EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.Scalar | SaturatingFlags.Signed); + } + + public static void EmitVectorSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit) + { + EmitSaturatingUnaryOpSx(context, emit, SaturatingFlags.Signed); + } + + public static void EmitSaturatingUnaryOpSx(ArmEmitterContext context, Func1I emit, SaturatingFlags flags) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + bool scalar = (flags & SaturatingFlags.Scalar) != 0; + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractSx(context, op.Rn, index, op.Size); + Operand de; + + if (op.Size <= 2) + { + de = EmitSignedSrcSatQ(context, emit(ne), op.Size, signedDst: true); + } + else /* if (op.Size == 3) */ + { + de = EmitUnarySignedSatQAbsOrNeg(context, emit(ne)); + } + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + public static void EmitScalarSaturatingBinaryOpSx(ArmEmitterContext context, Func2I emit = null, SaturatingFlags flags = SaturatingFlags.None) + { + EmitSaturatingBinaryOp(context, emit, SaturatingFlags.Scalar | SaturatingFlags.Signed | flags); + } + + public static void EmitScalarSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags) + { + EmitSaturatingBinaryOp(context, null, SaturatingFlags.Scalar | flags); + } + + public static void EmitVectorSaturatingBinaryOpSx(ArmEmitterContext context, Func2I emit = null, SaturatingFlags flags = SaturatingFlags.None) + { + EmitSaturatingBinaryOp(context, emit, SaturatingFlags.Signed | flags); + } + + public static void EmitVectorSaturatingBinaryOpZx(ArmEmitterContext context, SaturatingFlags flags) + { + EmitSaturatingBinaryOp(context, null, flags); + } + + public static void EmitVectorSaturatingBinaryOpByElemSx(ArmEmitterContext context, Func2I emit) + { + EmitSaturatingBinaryOp(context, emit, SaturatingFlags.ByElem | SaturatingFlags.Signed); + } + + public static void EmitSaturatingBinaryOp(ArmEmitterContext context, Func2I emit, SaturatingFlags flags) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + bool byElem = (flags & SaturatingFlags.ByElem) != 0; + bool scalar = (flags & SaturatingFlags.Scalar) != 0; + bool signed = (flags & SaturatingFlags.Signed) != 0; + + bool add = (flags & SaturatingFlags.Add) != 0; + bool sub = (flags & SaturatingFlags.Sub) != 0; + + bool accumulate = (flags & SaturatingFlags.Accumulate) != 0; + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + if (add || sub) + { + for (int index = 0; index < elems; index++) + { + Operand de; + Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed); + Operand me = EmitVectorExtract(context, ((OpCodeSimdReg)op).Rm, index, op.Size, signed); + + if (op.Size <= 2) + { + Operand temp = add ? context.Add(ne, me) : context.Subtract(ne, me); + + de = EmitSignedSrcSatQ(context, temp, op.Size, signedDst: signed); + } + else /* if (op.Size == 3) */ + { + if (add) + { + de = signed ? EmitBinarySignedSatQAdd(context, ne, me) : EmitBinaryUnsignedSatQAdd(context, ne, me); + } + else /* if (sub) */ + { + de = signed ? EmitBinarySignedSatQSub(context, ne, me) : EmitBinaryUnsignedSatQSub(context, ne, me); + } + } + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + } + else if (accumulate) + { + for (int index = 0; index < elems; index++) + { + Operand de; + Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, !signed); + Operand me = EmitVectorExtract(context, op.Rd, index, op.Size, signed); + + if (op.Size <= 2) + { + Operand temp = context.Add(ne, me); + + de = EmitSignedSrcSatQ(context, temp, op.Size, signedDst: signed); + } + else /* if (op.Size == 3) */ + { + de = signed ? EmitBinarySignedSatQAcc(context, ne, me) : EmitBinaryUnsignedSatQAcc(context, ne, me); + } + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + } + else + { + Operand me = default; + + if (byElem) + { + OpCodeSimdRegElem opRegElem = (OpCodeSimdRegElem)op; + + me = EmitVectorExtract(context, opRegElem.Rm, opRegElem.Index, op.Size, signed); + } + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed); + + if (!byElem) + { + me = EmitVectorExtract(context, ((OpCodeSimdReg)op).Rm, index, op.Size, signed); + } + + Operand de = EmitSignedSrcSatQ(context, emit(ne, me), op.Size, signedDst: signed); + + res = EmitVectorInsert(context, res, de, index, op.Size); + } + } + + context.Copy(GetVec(op.Rd), res); + } + + [Flags] + public enum SaturatingNarrowFlags + { + Scalar = 1 << 0, + SignedSrc = 1 << 1, + SignedDst = 1 << 2, + + ScalarSxSx = Scalar | SignedSrc | SignedDst, + ScalarSxZx = Scalar | SignedSrc, + ScalarZxZx = Scalar, + + VectorSxSx = SignedSrc | SignedDst, + VectorSxZx = SignedSrc, + VectorZxZx = 0 + } + + public static void EmitSaturatingNarrowOp(ArmEmitterContext context, SaturatingNarrowFlags flags) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + bool scalar = (flags & SaturatingNarrowFlags.Scalar) != 0; + bool signedSrc = (flags & SaturatingNarrowFlags.SignedSrc) != 0; + bool signedDst = (flags & SaturatingNarrowFlags.SignedDst) != 0; + + int elems = !scalar ? 8 >> op.Size : 1; + + int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0; + + Operand d = GetVec(op.Rd); + + Operand res = part == 0 ? context.VectorZero() : context.Copy(d); + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc); + + Operand temp = signedSrc + ? EmitSignedSrcSatQ(context, ne, op.Size, signedDst) + : EmitUnsignedSrcSatQ(context, ne, op.Size, signedDst); + + res = EmitVectorInsert(context, res, temp, part + index, op.Size); + } + + context.Copy(d, res); + } + + // long SignedSignSatQ(long op, int size); + public static Operand EmitSignedSignSatQ(ArmEmitterContext context, Operand op, int size) + { + int eSize = 8 << size; + + Debug.Assert(op.Type == OperandType.I64); + Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64); + + Operand lbl1 = Label(); + Operand lblEnd = Label(); + + Operand zeroL = Const(0L); + Operand maxT = Const((1L << (eSize - 1)) - 1L); + Operand minT = Const(-(1L << (eSize - 1))); + + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroL); + + context.BranchIf(lbl1, op, zeroL, Comparison.LessOrEqual); + context.Copy(res, maxT); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lbl1); + context.BranchIf(lblEnd, op, zeroL, Comparison.GreaterOrEqual); + context.Copy(res, minT); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + // private static ulong UnsignedSignSatQ(ulong op, int size); + public static Operand EmitUnsignedSignSatQ(ArmEmitterContext context, Operand op, int size) + { + int eSize = 8 << size; + + Debug.Assert(op.Type == OperandType.I64); + Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64); + + Operand lblEnd = Label(); + + Operand zeroUL = Const(0UL); + Operand maxT = Const(ulong.MaxValue >> (64 - eSize)); + + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroUL); + + context.BranchIf(lblEnd, op, zeroUL, Comparison.LessOrEqualUI); + context.Copy(res, maxT); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + // TSrc (16bit, 32bit, 64bit; signed) > TDst (8bit, 16bit, 32bit; signed, unsigned). + // long SignedSrcSignedDstSatQ(long op, int size); ulong SignedSrcUnsignedDstSatQ(long op, int size); + public static Operand EmitSignedSrcSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedDst) + { + int eSizeDst = 8 << sizeDst; + + Debug.Assert(op.Type == OperandType.I64); + Debug.Assert(eSizeDst == 8 || eSizeDst == 16 || eSizeDst == 32); + + Operand lbl1 = Label(); + Operand lblEnd = Label(); + + Operand maxT = signedDst ? Const((1L << (eSizeDst - 1)) - 1L) : Const((1UL << eSizeDst) - 1UL); + Operand minT = signedDst ? Const(-(1L << (eSizeDst - 1))) : Const(0UL); + + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op); + + context.BranchIf(lbl1, op, maxT, Comparison.LessOrEqual); + context.Copy(res, maxT); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lbl1); + context.BranchIf(lblEnd, op, minT, Comparison.GreaterOrEqual); + context.Copy(res, minT); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + // TSrc (16bit, 32bit, 64bit; unsigned) > TDst (8bit, 16bit, 32bit; signed, unsigned). + // long UnsignedSrcSignedDstSatQ(ulong op, int size); ulong UnsignedSrcUnsignedDstSatQ(ulong op, int size); + public static Operand EmitUnsignedSrcSatQ(ArmEmitterContext context, Operand op, int sizeDst, bool signedDst) + { + int eSizeDst = 8 << sizeDst; + + Debug.Assert(op.Type == OperandType.I64); + Debug.Assert(eSizeDst == 8 || eSizeDst == 16 || eSizeDst == 32); + + Operand lblEnd = Label(); + + Operand maxT = signedDst ? Const((1L << (eSizeDst - 1)) - 1L) : Const((1UL << eSizeDst) - 1UL); + + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op); + + context.BranchIf(lblEnd, op, maxT, Comparison.LessOrEqualUI); + context.Copy(res, maxT); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + // long UnarySignedSatQAbsOrNeg(long op); + private static Operand EmitUnarySignedSatQAbsOrNeg(ArmEmitterContext context, Operand op) + { + Debug.Assert(op.Type == OperandType.I64); + + Operand lblEnd = Label(); + + Operand minL = Const(long.MinValue); + Operand maxL = Const(long.MaxValue); + + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op); + + context.BranchIf(lblEnd, op, minL, Comparison.NotEqual); + context.Copy(res, maxL); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + // long BinarySignedSatQAdd(long op1, long op2); + public static Operand EmitBinarySignedSatQAdd(ArmEmitterContext context, Operand op1, Operand op2) + { + Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64); + + Operand lblEnd = Label(); + + Operand minL = Const(long.MinValue); + Operand maxL = Const(long.MaxValue); + Operand zeroL = Const(0L); + + Operand add = context.Add(op1, op2); + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add); + + Operand left = context.BitwiseNot(context.BitwiseExclusiveOr(op1, op2)); + Operand right = context.BitwiseExclusiveOr(op1, add); + context.BranchIf(lblEnd, context.BitwiseAnd(left, right), zeroL, Comparison.GreaterOrEqual); + + Operand isPositive = context.ICompareGreaterOrEqual(op1, zeroL); + context.Copy(res, context.ConditionalSelect(isPositive, maxL, minL)); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + // ulong BinaryUnsignedSatQAdd(ulong op1, ulong op2); + public static Operand EmitBinaryUnsignedSatQAdd(ArmEmitterContext context, Operand op1, Operand op2) + { + Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64); + + Operand lblEnd = Label(); + + Operand maxUL = Const(ulong.MaxValue); + + Operand add = context.Add(op1, op2); + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add); + + context.BranchIf(lblEnd, add, op1, Comparison.GreaterOrEqualUI); + context.Copy(res, maxUL); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + // long BinarySignedSatQSub(long op1, long op2); + public static Operand EmitBinarySignedSatQSub(ArmEmitterContext context, Operand op1, Operand op2) + { + Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64); + + Operand lblEnd = Label(); + + Operand minL = Const(long.MinValue); + Operand maxL = Const(long.MaxValue); + Operand zeroL = Const(0L); + + Operand sub = context.Subtract(op1, op2); + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sub); + + Operand left = context.BitwiseExclusiveOr(op1, op2); + Operand right = context.BitwiseExclusiveOr(op1, sub); + context.BranchIf(lblEnd, context.BitwiseAnd(left, right), zeroL, Comparison.GreaterOrEqual); + + Operand isPositive = context.ICompareGreaterOrEqual(op1, zeroL); + context.Copy(res, context.ConditionalSelect(isPositive, maxL, minL)); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + // ulong BinaryUnsignedSatQSub(ulong op1, ulong op2); + public static Operand EmitBinaryUnsignedSatQSub(ArmEmitterContext context, Operand op1, Operand op2) + { + Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64); + + Operand lblEnd = Label(); + + Operand zeroL = Const(0L); + + Operand sub = context.Subtract(op1, op2); + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sub); + + context.BranchIf(lblEnd, op1, op2, Comparison.GreaterOrEqualUI); + context.Copy(res, zeroL); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + // long BinarySignedSatQAcc(ulong op1, long op2); + private static Operand EmitBinarySignedSatQAcc(ArmEmitterContext context, Operand op1, Operand op2) + { + Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64); + + Operand lbl1 = Label(); + Operand lbl2 = Label(); + Operand lblEnd = Label(); + + Operand maxL = Const(long.MaxValue); + Operand zeroL = Const(0L); + + Operand add = context.Add(op1, op2); + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add); + + context.BranchIf(lbl1, op1, maxL, Comparison.GreaterUI); + Operand notOp2AndRes = context.BitwiseAnd(context.BitwiseNot(op2), add); + context.BranchIf(lblEnd, notOp2AndRes, zeroL, Comparison.GreaterOrEqual); + context.Copy(res, maxL); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lbl1); + context.BranchIf(lbl2, op2, zeroL, Comparison.Less); + context.Copy(res, maxL); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lbl2); + context.BranchIf(lblEnd, add, maxL, Comparison.LessOrEqualUI); + context.Copy(res, maxL); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + // ulong BinaryUnsignedSatQAcc(long op1, ulong op2); + private static Operand EmitBinaryUnsignedSatQAcc(ArmEmitterContext context, Operand op1, Operand op2) + { + Debug.Assert(op1.Type == OperandType.I64 && op2.Type == OperandType.I64); + + Operand lbl1 = Label(); + Operand lblEnd = Label(); + + Operand maxUL = Const(ulong.MaxValue); + Operand maxL = Const(long.MaxValue); + Operand zeroL = Const(0L); + + Operand add = context.Add(op1, op2); + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), add); + + context.BranchIf(lbl1, op1, zeroL, Comparison.Less); + context.BranchIf(lblEnd, add, op1, Comparison.GreaterOrEqualUI); + context.Copy(res, maxUL); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lbl1); + context.BranchIf(lblEnd, op2, maxL, Comparison.GreaterUI); + context.BranchIf(lblEnd, add, zeroL, Comparison.GreaterOrEqual); + context.Copy(res, zeroL); + SetFpFlag(context, FPState.QcFlag, Const(1)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + public static Operand EmitFloatAbs(ArmEmitterContext context, Operand value, bool single, bool vector) + { + Operand mask; + if (single) + { + mask = vector ? X86GetAllElements(context, -0f) : X86GetScalar(context, -0f); + } + else + { + mask = vector ? X86GetAllElements(context, -0d) : X86GetScalar(context, -0d); + } + + return context.AddIntrinsic(single ? Intrinsic.X86Andnps : Intrinsic.X86Andnpd, mask, value); + } + + public static Operand EmitVectorExtractSx(ArmEmitterContext context, int reg, int index, int size) + { + return EmitVectorExtract(context, reg, index, size, true); + } + + public static Operand EmitVectorExtractZx(ArmEmitterContext context, int reg, int index, int size) + { + return EmitVectorExtract(context, reg, index, size, false); + } + + public static Operand EmitVectorExtract(ArmEmitterContext context, int reg, int index, int size, bool signed) + { + ThrowIfInvalid(index, size); + + Operand res = default; + + switch (size) + { + case 0: + res = context.VectorExtract8(GetVec(reg), index); + break; + + case 1: + res = context.VectorExtract16(GetVec(reg), index); + break; + + case 2: + res = context.VectorExtract(OperandType.I32, GetVec(reg), index); + break; + + case 3: + res = context.VectorExtract(OperandType.I64, GetVec(reg), index); + break; + } + + if (signed) + { + switch (size) + { + case 0: res = context.SignExtend8 (OperandType.I64, res); break; + case 1: res = context.SignExtend16(OperandType.I64, res); break; + case 2: res = context.SignExtend32(OperandType.I64, res); break; + } + } + else + { + switch (size) + { + case 0: res = context.ZeroExtend8 (OperandType.I64, res); break; + case 1: res = context.ZeroExtend16(OperandType.I64, res); break; + case 2: res = context.ZeroExtend32(OperandType.I64, res); break; + } + } + + return res; + } + + public static Operand EmitVectorInsert(ArmEmitterContext context, Operand vector, Operand value, int index, int size) + { + ThrowIfInvalid(index, size); + + if (size < 3 && value.Type == OperandType.I64) + { + value = context.ConvertI64ToI32(value); + } + + switch (size) + { + case 0: vector = context.VectorInsert8 (vector, value, index); break; + case 1: vector = context.VectorInsert16(vector, value, index); break; + case 2: vector = context.VectorInsert (vector, value, index); break; + case 3: vector = context.VectorInsert (vector, value, index); break; + } + + return vector; + } + + public static void ThrowIfInvalid(int index, int size) + { + if ((uint)size > 3u) + { + throw new ArgumentOutOfRangeException(nameof(size)); + } + + if ((uint)index >= 16u >> size) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHelper32.cs b/src/ARMeilleure/Instructions/InstEmitSimdHelper32.cs new file mode 100644 index 00000000..36d27d42 --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSimdHelper32.cs @@ -0,0 +1,1286 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; +using System.Reflection; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + using Func1I = Func<Operand, Operand>; + using Func2I = Func<Operand, Operand, Operand>; + using Func3I = Func<Operand, Operand, Operand, Operand>; + + static class InstEmitSimdHelper32 + { + public static (int, int) GetQuadwordAndSubindex(int index, RegisterSize size) + { + switch (size) + { + case RegisterSize.Simd128: + return (index >> 1, 0); + case RegisterSize.Simd64: + case RegisterSize.Int64: + return (index >> 1, index & 1); + case RegisterSize.Int32: + return (index >> 2, index & 3); + } + + throw new ArgumentException("Unrecognized Vector Register Size."); + } + + public static Operand ExtractScalar(ArmEmitterContext context, OperandType type, int reg) + { + Debug.Assert(type != OperandType.V128); + + if (type == OperandType.FP64 || type == OperandType.I64) + { + // From dreg. + return context.VectorExtract(type, GetVecA32(reg >> 1), reg & 1); + } + else + { + // From sreg. + return context.VectorExtract(type, GetVecA32(reg >> 2), reg & 3); + } + } + + public static void InsertScalar(ArmEmitterContext context, int reg, Operand value) + { + Debug.Assert(value.Type != OperandType.V128); + + Operand vec, insert; + if (value.Type == OperandType.FP64 || value.Type == OperandType.I64) + { + // From dreg. + vec = GetVecA32(reg >> 1); + insert = context.VectorInsert(vec, value, reg & 1); + } + else + { + // From sreg. + vec = GetVecA32(reg >> 2); + insert = context.VectorInsert(vec, value, reg & 3); + } + + context.Copy(vec, insert); + } + + public static Operand ExtractScalar16(ArmEmitterContext context, int reg, bool top) + { + return context.VectorExtract16(GetVecA32(reg >> 2), ((reg & 3) << 1) | (top ? 1 : 0)); + } + + public static void InsertScalar16(ArmEmitterContext context, int reg, bool top, Operand value) + { + Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.I32); + + Operand vec, insert; + vec = GetVecA32(reg >> 2); + insert = context.VectorInsert16(vec, value, ((reg & 3) << 1) | (top ? 1 : 0)); + + context.Copy(vec, insert); + } + + public static Operand ExtractElement(ArmEmitterContext context, int reg, int size, bool signed) + { + return EmitVectorExtract32(context, reg >> (4 - size), reg & ((16 >> size) - 1), size, signed); + } + + public static void EmitVectorImmUnaryOp32(ArmEmitterContext context, Func1I emit) + { + IOpCode32SimdImm op = (IOpCode32SimdImm)context.CurrOp; + + Operand imm = Const(op.Immediate); + + int elems = op.Elems; + (int index, int subIndex) = GetQuadwordAndSubindex(op.Vd, op.RegisterSize); + + Operand vec = GetVecA32(index); + Operand res = vec; + + for (int item = 0; item < elems; item++) + { + res = EmitVectorInsert(context, res, emit(imm), item + subIndex * elems, op.Size); + } + + context.Copy(vec, res); + } + + public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Func1I emit) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand m = ExtractScalar(context, type, op.Vm); + + InsertScalar(context, op.Vd, emit(m)); + } + + public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Func2I emit) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand n = ExtractScalar(context, type, op.Vn); + Operand m = ExtractScalar(context, type, op.Vm); + + InsertScalar(context, op.Vd, emit(n, m)); + } + + public static void EmitScalarBinaryOpI32(ArmEmitterContext context, Func2I emit) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.I64 : OperandType.I32; + + if (op.Size < 2) + { + throw new NotSupportedException("Cannot perform a scalar SIMD operation on integers smaller than 32 bits."); + } + + Operand n = ExtractScalar(context, type, op.Vn); + Operand m = ExtractScalar(context, type, op.Vm); + + InsertScalar(context, op.Vd, emit(n, m)); + } + + public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Func3I emit) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + OperandType type = (op.Size & 1) != 0 ? OperandType.FP64 : OperandType.FP32; + + Operand a = ExtractScalar(context, type, op.Vd); + Operand n = ExtractScalar(context, type, op.Vn); + Operand m = ExtractScalar(context, type, op.Vm); + + InsertScalar(context, op.Vd, emit(a, n, m)); + } + + public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Func1I emit) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < elems; index++) + { + Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index); + + res = context.VectorInsert(res, emit(me), op.Fd + index); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Func2I emit) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> (sizeF + 2); + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index); + Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index); + + res = context.VectorInsert(res, emit(ne, me), op.Fd + index); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Func3I emit) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < elems; index++) + { + Operand de = context.VectorExtract(type, GetVecA32(op.Qd), op.Fd + index); + Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index); + Operand me = context.VectorExtract(type, GetVecA32(op.Qm), op.Fm + index); + + res = context.VectorInsert(res, emit(de, ne, me), op.Fd + index); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + // Integer + + public static void EmitVectorUnaryAccumulateOpI32(ArmEmitterContext context, Func1I emit, bool signed) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed); + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); + + res = EmitVectorInsert(context, res, context.Add(de, emit(me)), op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorUnaryOpI32(ArmEmitterContext context, Func1I emit, bool signed) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(me), op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorBinaryOpI32(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, me), op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorBinaryLongOpI32(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); + + if (op.Size == 2) + { + ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne); + me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me); + } + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorBinaryWideOpI32(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size + 1, signed); + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); + + if (op.Size == 2) + { + me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me); + } + + res = EmitVectorInsert(context, res, emit(ne, me), index, op.Size + 1); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorImmBinaryQdQmOpZx32(ArmEmitterContext context, Func2I emit) + { + EmitVectorImmBinaryQdQmOpI32(context, emit, false); + } + + public static void EmitVectorImmBinaryQdQmOpSx32(ArmEmitterContext context, Func2I emit) + { + EmitVectorImmBinaryQdQmOpI32(context, emit, true); + } + + public static void EmitVectorImmBinaryQdQmOpI32(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed); + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(de, me), op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorTernaryLongOpI32(ArmEmitterContext context, Func3I emit, bool signed) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size + 1, signed); + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); + + if (op.Size == 2) + { + ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne); + me = signed ? context.SignExtend32(OperandType.I64, me) : context.ZeroExtend32(OperandType.I64, me); + } + + res = EmitVectorInsert(context, res, emit(de, ne, me), index, op.Size + 1); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorTernaryOpI32(ArmEmitterContext context, Func3I emit, bool signed) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed); + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(de, ne, me), op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorUnaryOpSx32(ArmEmitterContext context, Func1I emit) + { + EmitVectorUnaryOpI32(context, emit, true); + } + + public static void EmitVectorUnaryOpSx32(ArmEmitterContext context, Func1I emit, bool accumulate) + { + if (accumulate) + { + EmitVectorUnaryAccumulateOpI32(context, emit, true); + } + else + { + EmitVectorUnaryOpI32(context, emit, true); + } + } + + public static void EmitVectorBinaryOpSx32(ArmEmitterContext context, Func2I emit) + { + EmitVectorBinaryOpI32(context, emit, true); + } + + public static void EmitVectorTernaryOpSx32(ArmEmitterContext context, Func3I emit) + { + EmitVectorTernaryOpI32(context, emit, true); + } + + public static void EmitVectorUnaryOpZx32(ArmEmitterContext context, Func1I emit) + { + EmitVectorUnaryOpI32(context, emit, false); + } + + public static void EmitVectorUnaryOpZx32(ArmEmitterContext context, Func1I emit, bool accumulate) + { + if (accumulate) + { + EmitVectorUnaryAccumulateOpI32(context, emit, false); + } + else + { + EmitVectorUnaryOpI32(context, emit, false); + } + } + + public static void EmitVectorBinaryOpZx32(ArmEmitterContext context, Func2I emit) + { + EmitVectorBinaryOpI32(context, emit, false); + } + + public static void EmitVectorTernaryOpZx32(ArmEmitterContext context, Func3I emit) + { + EmitVectorTernaryOpI32(context, emit, false); + } + + // Vector by scalar + + public static void EmitVectorByScalarOpF32(ArmEmitterContext context, Func2I emit) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + Operand m = ExtractScalar(context, type, op.Vm); + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < elems; index++) + { + Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index); + + res = context.VectorInsert(res, emit(ne, m), op.Fd + index); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorByScalarOpI32(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + Operand m = ExtractElement(context, op.Vm, op.Size, signed); + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, m), op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorByScalarLongOpI32(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + Operand m = ExtractElement(context, op.Vm, op.Size, signed); + + if (op.Size == 2) + { + m = signed ? context.SignExtend32(OperandType.I64, m) : context.ZeroExtend32(OperandType.I64, m); + } + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); + + if (op.Size == 2) + { + ne = signed ? context.SignExtend32(OperandType.I64, ne) : context.ZeroExtend32(OperandType.I64, ne); + } + + res = EmitVectorInsert(context, res, emit(ne, m), index, op.Size + 1); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorsByScalarOpF32(ArmEmitterContext context, Func3I emit) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> sizeF + 2; + + Operand m = ExtractScalar(context, type, op.Vm); + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < elems; index++) + { + Operand de = context.VectorExtract(type, GetVecA32(op.Qd), op.Fd + index); + Operand ne = context.VectorExtract(type, GetVecA32(op.Qn), op.Fn + index); + + res = context.VectorInsert(res, emit(de, ne, m), op.Fd + index); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorsByScalarOpI32(ArmEmitterContext context, Func3I emit, bool signed) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + Operand m = EmitVectorExtract32(context, op.Vm >> (4 - op.Size), op.Vm & ((1 << (4 - op.Size)) - 1), op.Size, signed); + + Operand res = GetVecA32(op.Qd); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand de = EmitVectorExtract32(context, op.Qd, op.Id + index, op.Size, signed); + Operand ne = EmitVectorExtract32(context, op.Qn, op.In + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(de, ne, m), op.Id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + // Pairwise + + public static void EmitVectorPairwiseOpF32(ArmEmitterContext context, Func2I emit) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + int sizeF = op.Size & 1; + + OperandType type = sizeF != 0 ? OperandType.FP64 : OperandType.FP32; + + int elems = op.GetBytesCount() >> (sizeF + 2); + int pairs = elems >> 1; + + Operand res = GetVecA32(op.Qd); + Operand mvec = GetVecA32(op.Qm); + Operand nvec = GetVecA32(op.Qn); + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand n1 = context.VectorExtract(type, nvec, op.Fn + pairIndex); + Operand n2 = context.VectorExtract(type, nvec, op.Fn + pairIndex + 1); + + res = context.VectorInsert(res, emit(n1, n2), op.Fd + index); + + Operand m1 = context.VectorExtract(type, mvec, op.Fm + pairIndex); + Operand m2 = context.VectorExtract(type, mvec, op.Fm + pairIndex + 1); + + res = context.VectorInsert(res, emit(m1, m2), op.Fd + index + pairs); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorPairwiseOpI32(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + int elems = op.GetBytesCount() >> op.Size; + int pairs = elems >> 1; + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + Operand n1 = EmitVectorExtract32(context, op.Qn, op.In + pairIndex, op.Size, signed); + Operand n2 = EmitVectorExtract32(context, op.Qn, op.In + pairIndex + 1, op.Size, signed); + + Operand m1 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex, op.Size, signed); + Operand m2 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex + 1, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(n1, n2), op.Id + index, op.Size); + res = EmitVectorInsert(context, res, emit(m1, m2), op.Id + index + pairs, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void EmitVectorPairwiseLongOpI32(ArmEmitterContext context, Func2I emit, bool signed) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + int elems = (op.Q ? 16 : 8) >> op.Size; + int pairs = elems >> 1; + int id = (op.Vd & 1) * pairs; + + Operand res = GetVecA32(op.Qd); + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + Operand m1 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex, op.Size, signed); + Operand m2 = EmitVectorExtract32(context, op.Qm, op.Im + pairIndex + 1, op.Size, signed); + + if (op.Size == 2) + { + m1 = signed ? context.SignExtend32(OperandType.I64, m1) : context.ZeroExtend32(OperandType.I64, m1); + m2 = signed ? context.SignExtend32(OperandType.I64, m2) : context.ZeroExtend32(OperandType.I64, m2); + } + + res = EmitVectorInsert(context, res, emit(m1, m2), id + index, op.Size + 1); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + // Narrow + + public static void EmitVectorUnaryNarrowOp32(ArmEmitterContext context, Func1I emit, bool signed = false) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + int elems = 8 >> op.Size; // Size contains the target element size. (for when it becomes a doubleword) + + Operand res = GetVecA32(op.Qd); + int id = (op.Vd & 1) << (3 - op.Size); // Target doubleword base. + + for (int index = 0; index < elems; index++) + { + Operand m = EmitVectorExtract32(context, op.Qm, index, op.Size + 1, signed); + + res = EmitVectorInsert(context, res, emit(m), id + index, op.Size); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + // Intrinsic Helpers + + public static Operand EmitMoveDoubleWordToSide(ArmEmitterContext context, Operand input, int originalV, int targetV) + { + Debug.Assert(input.Type == OperandType.V128); + + int originalSide = originalV & 1; + int targetSide = targetV & 1; + + if (originalSide == targetSide) + { + return input; + } + + if (targetSide == 1) + { + return context.AddIntrinsic(Intrinsic.X86Movlhps, input, input); // Low to high. + } + else + { + return context.AddIntrinsic(Intrinsic.X86Movhlps, input, input); // High to low. + } + } + + public static Operand EmitDoubleWordInsert(ArmEmitterContext context, Operand target, Operand value, int targetV) + { + Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128); + + int targetSide = targetV & 1; + int shuffleMask = 2; + + if (targetSide == 1) + { + return context.AddIntrinsic(Intrinsic.X86Shufpd, target, value, Const(shuffleMask)); + } + else + { + return context.AddIntrinsic(Intrinsic.X86Shufpd, value, target, Const(shuffleMask)); + } + } + + public static Operand EmitScalarInsert(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth) + { + Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128); + + // Insert from index 0 in value to index in target. + int index = reg & (doubleWidth ? 1 : 3); + + if (doubleWidth) + { + if (index == 1) + { + return context.AddIntrinsic(Intrinsic.X86Movlhps, target, value); // Low to high. + } + else + { + return context.AddIntrinsic(Intrinsic.X86Shufpd, value, target, Const(2)); // Low to low, keep high from original. + } + } + else + { + if (Optimizations.UseSse41) + { + return context.AddIntrinsic(Intrinsic.X86Insertps, target, value, Const(index << 4)); + } + else + { + target = EmitSwapScalar(context, target, index, doubleWidth); // Swap value to replace into element 0. + target = context.AddIntrinsic(Intrinsic.X86Movss, target, value); // Move the value into element 0 of the vector. + return EmitSwapScalar(context, target, index, doubleWidth); // Swap new value back to the correct index. + } + } + } + + public static Operand EmitSwapScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth) + { + // Index into 0, 0 into index. This swap happens at the start of an A32 scalar op if required. + int index = reg & (doubleWidth ? 1 : 3); + if (index == 0) return target; + + if (doubleWidth) + { + int shuffleMask = 1; // Swap top and bottom. (b0 = 1, b1 = 0) + return context.AddIntrinsic(Intrinsic.X86Shufpd, target, target, Const(shuffleMask)); + } + else + { + int shuffleMask = (3 << 6) | (2 << 4) | (1 << 2) | index; // Swap index and 0. (others remain) + shuffleMask &= ~(3 << (index * 2)); + + return context.AddIntrinsic(Intrinsic.X86Shufps, target, target, Const(shuffleMask)); + } + } + + // Vector Operand Templates + + public static void EmitVectorUnaryOpSimd32(ArmEmitterContext context, Func1I vectorFunc) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + Operand m = GetVecA32(op.Qm); + Operand d = GetVecA32(op.Qd); + + if (!op.Q) // Register swap: move relevant doubleword to destination side. + { + m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd); + } + + Operand res = vectorFunc(m); + + if (!op.Q) // Register insert. + { + res = EmitDoubleWordInsert(context, d, res, op.Vd); + } + + context.Copy(d, res); + } + + public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; + + EmitVectorUnaryOpSimd32(context, (m) => context.AddIntrinsic(inst, m)); + } + + public static void EmitVectorBinaryOpSimd32(ArmEmitterContext context, Func2I vectorFunc, int side = -1) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand n = GetVecA32(op.Qn); + Operand m = GetVecA32(op.Qm); + Operand d = GetVecA32(op.Qd); + + if (side == -1) + { + side = op.Vd; + } + + if (!op.Q) // Register swap: move relevant doubleword to destination side. + { + n = EmitMoveDoubleWordToSide(context, n, op.Vn, side); + m = EmitMoveDoubleWordToSide(context, m, op.Vm, side); + } + + Operand res = vectorFunc(n, m); + + if (!op.Q) // Register insert. + { + if (side != op.Vd) + { + res = EmitMoveDoubleWordToSide(context, res, side, op.Vd); + } + res = EmitDoubleWordInsert(context, d, res, op.Vd); + } + + context.Copy(d, res); + } + + public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; + EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m)); + } + + public static void EmitVectorTernaryOpSimd32(ArmEmitterContext context, Func3I vectorFunc) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand n = GetVecA32(op.Qn); + Operand m = GetVecA32(op.Qm); + Operand d = GetVecA32(op.Qd); + Operand initialD = d; + + if (!op.Q) // Register swap: move relevant doubleword to destination side. + { + n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd); + m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd); + } + + Operand res = vectorFunc(d, n, m); + + if (!op.Q) // Register insert. + { + res = EmitDoubleWordInsert(context, initialD, res, op.Vd); + } + + context.Copy(initialD, res); + } + + public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Intrinsic inst1 = (op.Size & 1) != 0 ? inst64pt1 : inst32pt1; + Intrinsic inst2 = (op.Size & 1) != 0 ? inst64pt2 : inst32pt2; + + EmitVectorTernaryOpSimd32(context, (d, n, m) => + { + Operand res = context.AddIntrinsic(inst1, n, m); + return res = context.AddIntrinsic(inst2, d, res); + }); + } + + public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst32) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Debug.Assert((op.Size & 1) == 0); + + EmitVectorTernaryOpSimd32(context, (d, n, m) => + { + return context.AddIntrinsic(inst32, d, n, m); + }); + } + + public static void EmitScalarUnaryOpSimd32(ArmEmitterContext context, Func1I scalarFunc) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + bool doubleSize = (op.Size & 1) != 0; + int shift = doubleSize ? 1 : 2; + Operand m = GetVecA32(op.Vm >> shift); + Operand d = GetVecA32(op.Vd >> shift); + + m = EmitSwapScalar(context, m, op.Vm, doubleSize); + + Operand res = scalarFunc(m); + + // Insert scalar into vector. + res = EmitScalarInsert(context, d, res, op.Vd, doubleSize); + + context.Copy(d, res); + } + + public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; + + EmitScalarUnaryOpSimd32(context, (m) => (inst == 0) ? m : context.AddIntrinsic(inst, m)); + } + + public static void EmitScalarBinaryOpSimd32(ArmEmitterContext context, Func2I scalarFunc) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + bool doubleSize = (op.Size & 1) != 0; + int shift = doubleSize ? 1 : 2; + Operand n = GetVecA32(op.Vn >> shift); + Operand m = GetVecA32(op.Vm >> shift); + Operand d = GetVecA32(op.Vd >> shift); + + n = EmitSwapScalar(context, n, op.Vn, doubleSize); + m = EmitSwapScalar(context, m, op.Vm, doubleSize); + + Operand res = scalarFunc(n, m); + + // Insert scalar into vector. + res = EmitScalarInsert(context, d, res, op.Vd, doubleSize); + + context.Copy(d, res); + } + + public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; + + EmitScalarBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m)); + } + + public static void EmitScalarTernaryOpSimd32(ArmEmitterContext context, Func3I scalarFunc) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + bool doubleSize = (op.Size & 1) != 0; + int shift = doubleSize ? 1 : 2; + Operand n = GetVecA32(op.Vn >> shift); + Operand m = GetVecA32(op.Vm >> shift); + Operand d = GetVecA32(op.Vd >> shift); + Operand initialD = d; + + n = EmitSwapScalar(context, n, op.Vn, doubleSize); + m = EmitSwapScalar(context, m, op.Vm, doubleSize); + d = EmitSwapScalar(context, d, op.Vd, doubleSize); + + Operand res = scalarFunc(d, n, m); + + // Insert scalar into vector. + res = EmitScalarInsert(context, initialD, res, op.Vd, doubleSize); + + context.Copy(initialD, res); + } + + public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + bool doubleSize = (op.Size & 1) != 0; + + Intrinsic inst = doubleSize ? inst64 : inst32; + + EmitScalarTernaryOpSimd32(context, (d, n, m) => + { + return context.AddIntrinsic(inst, d, n, m); + }); + } + + public static void EmitScalarTernaryOpF32( + ArmEmitterContext context, + Intrinsic inst32pt1, + Intrinsic inst64pt1, + Intrinsic inst32pt2, + Intrinsic inst64pt2, + bool isNegD = false) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + bool doubleSize = (op.Size & 1) != 0; + + Intrinsic inst1 = doubleSize ? inst64pt1 : inst32pt1; + Intrinsic inst2 = doubleSize ? inst64pt2 : inst32pt2; + + EmitScalarTernaryOpSimd32(context, (d, n, m) => + { + Operand res = context.AddIntrinsic(inst1, n, m); + + if (isNegD) + { + Operand mask = doubleSize + ? X86GetScalar(context, -0d) + : X86GetScalar(context, -0f); + + d = doubleSize + ? context.AddIntrinsic(Intrinsic.X86Xorpd, mask, d) + : context.AddIntrinsic(Intrinsic.X86Xorps, mask, d); + } + + return context.AddIntrinsic(inst2, d, res); + }); + } + + // By Scalar + + public static void EmitVectorByScalarOpSimd32(ArmEmitterContext context, Func2I vectorFunc) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + Operand n = GetVecA32(op.Qn); + Operand d = GetVecA32(op.Qd); + + int index = op.Vm & 3; + int dupeMask = (index << 6) | (index << 4) | (index << 2) | index; + Operand m = GetVecA32(op.Vm >> 2); + m = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(dupeMask)); + + if (!op.Q) // Register swap: move relevant doubleword to destination side. + { + n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd); + } + + Operand res = vectorFunc(n, m); + + if (!op.Q) // Register insert. + { + res = EmitDoubleWordInsert(context, d, res, op.Vd); + } + + context.Copy(d, res); + } + + public static void EmitVectorByScalarOpF32(ArmEmitterContext context, Intrinsic inst32, Intrinsic inst64) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + Intrinsic inst = (op.Size & 1) != 0 ? inst64 : inst32; + EmitVectorByScalarOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m)); + } + + public static void EmitVectorsByScalarOpSimd32(ArmEmitterContext context, Func3I vectorFunc) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + Operand n = GetVecA32(op.Qn); + Operand d = GetVecA32(op.Qd); + Operand initialD = d; + + int index = op.Vm & 3; + int dupeMask = (index << 6) | (index << 4) | (index << 2) | index; + Operand m = GetVecA32(op.Vm >> 2); + m = context.AddIntrinsic(Intrinsic.X86Shufps, m, m, Const(dupeMask)); + + if (!op.Q) // Register swap: move relevant doubleword to destination side. + { + n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd); + } + + Operand res = vectorFunc(d, n, m); + + if (!op.Q) // Register insert. + { + res = EmitDoubleWordInsert(context, initialD, res, op.Vd); + } + + context.Copy(initialD, res); + } + + public static void EmitVectorsByScalarOpF32(ArmEmitterContext context, Intrinsic inst32pt1, Intrinsic inst64pt1, Intrinsic inst32pt2, Intrinsic inst64pt2) + { + OpCode32SimdRegElem op = (OpCode32SimdRegElem)context.CurrOp; + + Intrinsic inst1 = (op.Size & 1) != 0 ? inst64pt1 : inst32pt1; + Intrinsic inst2 = (op.Size & 1) != 0 ? inst64pt2 : inst32pt2; + + EmitVectorsByScalarOpSimd32(context, (d, n, m) => + { + Operand res = context.AddIntrinsic(inst1, n, m); + return res = context.AddIntrinsic(inst2, d, res); + }); + } + + // Pairwise + + public static void EmitSse2VectorPairwiseOpF32(ArmEmitterContext context, Intrinsic inst32) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + EmitVectorBinaryOpSimd32(context, (n, m) => + { + Operand unpck = context.AddIntrinsic(Intrinsic.X86Unpcklps, n, m); + + Operand part0 = unpck; + Operand part1 = context.AddIntrinsic(Intrinsic.X86Movhlps, unpck, unpck); + + return context.AddIntrinsic(inst32, part0, part1); + }, 0); + } + + public static void EmitSsse3VectorPairwiseOp32(ArmEmitterContext context, Intrinsic[] inst) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + EmitVectorBinaryOpSimd32(context, (n, m) => + { + if (op.RegisterSize == RegisterSize.Simd64) + { + Operand zeroEvenMask = X86GetElements(context, ZeroMask, EvenMasks[op.Size]); + Operand zeroOddMask = X86GetElements(context, ZeroMask, OddMasks[op.Size]); + + Operand mN = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m); // m:n + + Operand left = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroEvenMask); // 0:even from m:n + Operand right = context.AddIntrinsic(Intrinsic.X86Pshufb, mN, zeroOddMask); // 0:odd from m:n + + return context.AddIntrinsic(inst[op.Size], left, right); + } + else if (op.Size < 3) + { + Operand oddEvenMask = X86GetElements(context, OddMasks[op.Size], EvenMasks[op.Size]); + + Operand oddEvenN = context.AddIntrinsic(Intrinsic.X86Pshufb, n, oddEvenMask); // odd:even from n + Operand oddEvenM = context.AddIntrinsic(Intrinsic.X86Pshufb, m, oddEvenMask); // odd:even from m + + Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, oddEvenN, oddEvenM); + Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, oddEvenN, oddEvenM); + + return context.AddIntrinsic(inst[op.Size], left, right); + } + else + { + Operand left = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, n, m); + Operand right = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, n, m); + + return context.AddIntrinsic(inst[3], left, right); + } + }, 0); + } + + // Generic Functions + + public static Operand EmitSoftFloatCallDefaultFpscr(ArmEmitterContext context, string name, params Operand[] callArgs) + { + IOpCodeSimd op = (IOpCodeSimd)context.CurrOp; + + MethodInfo info = (op.Size & 1) == 0 + ? typeof(SoftFloat32).GetMethod(name) + : typeof(SoftFloat64).GetMethod(name); + + Array.Resize(ref callArgs, callArgs.Length + 1); + callArgs[callArgs.Length - 1] = Const(1); + + context.ExitArmFpMode(); + context.StoreToContext(); + Operand res = context.Call(info, callArgs); + context.LoadFromContext(); + context.EnterArmFpMode(); + + return res; + } + + public static Operand EmitVectorExtractSx32(ArmEmitterContext context, int reg, int index, int size) + { + return EmitVectorExtract32(context, reg, index, size, true); + } + + public static Operand EmitVectorExtractZx32(ArmEmitterContext context, int reg, int index, int size) + { + return EmitVectorExtract32(context, reg, index, size, false); + } + + public static Operand EmitVectorExtract32(ArmEmitterContext context, int reg, int index, int size, bool signed) + { + ThrowIfInvalid(index, size); + + Operand res = default; + + switch (size) + { + case 0: + res = context.VectorExtract8(GetVec(reg), index); + break; + + case 1: + res = context.VectorExtract16(GetVec(reg), index); + break; + + case 2: + res = context.VectorExtract(OperandType.I32, GetVec(reg), index); + break; + + case 3: + res = context.VectorExtract(OperandType.I64, GetVec(reg), index); + break; + } + + if (signed) + { + switch (size) + { + case 0: res = context.SignExtend8(OperandType.I32, res); break; + case 1: res = context.SignExtend16(OperandType.I32, res); break; + } + } + else + { + switch (size) + { + case 0: res = context.ZeroExtend8(OperandType.I32, res); break; + case 1: res = context.ZeroExtend16(OperandType.I32, res); break; + } + } + + return res; + } + + public static Operand EmitPolynomialMultiply(ArmEmitterContext context, Operand op1, Operand op2, int eSize) + { + Debug.Assert(eSize <= 32); + + Operand result = eSize == 32 ? Const(0L) : Const(0); + + if (eSize == 32) + { + op1 = context.ZeroExtend32(OperandType.I64, op1); + op2 = context.ZeroExtend32(OperandType.I64, op2); + } + + for (int i = 0; i < eSize; i++) + { + Operand mask = context.BitwiseAnd(op1, Const(op1.Type, 1L << i)); + + result = context.BitwiseExclusiveOr(result, context.Multiply(op2, mask)); + } + + return result; + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs b/src/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs new file mode 100644 index 00000000..98236be6 --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSimdHelper32Arm64.cs @@ -0,0 +1,366 @@ + +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + using Func1I = Func<Operand, Operand>; + using Func2I = Func<Operand, Operand, Operand>; + using Func3I = Func<Operand, Operand, Operand, Operand>; + + static class InstEmitSimdHelper32Arm64 + { + // Intrinsic Helpers + + public static Operand EmitMoveDoubleWordToSide(ArmEmitterContext context, Operand input, int originalV, int targetV) + { + Debug.Assert(input.Type == OperandType.V128); + + int originalSide = originalV & 1; + int targetSide = targetV & 1; + + if (originalSide == targetSide) + { + return input; + } + + Intrinsic vType = Intrinsic.Arm64VDWord | Intrinsic.Arm64V128; + + if (targetSide == 1) + { + return context.AddIntrinsic(Intrinsic.Arm64DupVe | vType, input, Const(OperandType.I32, 0)); // Low to high. + } + else + { + return context.AddIntrinsic(Intrinsic.Arm64DupVe | vType, input, Const(OperandType.I32, 1)); // High to low. + } + } + + public static Operand EmitDoubleWordInsert(ArmEmitterContext context, Operand target, Operand value, int targetV) + { + Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128); + + int targetSide = targetV & 1; + Operand idx = Const(targetSide); + + return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, target, idx, value, idx); + } + + public static Operand EmitScalarInsert(ArmEmitterContext context, Operand target, Operand value, int reg, bool doubleWidth) + { + Debug.Assert(target.Type == OperandType.V128 && value.Type == OperandType.V128); + + // Insert from index 0 in value to index in target. + int index = reg & (doubleWidth ? 1 : 3); + + if (doubleWidth) + { + return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, target, Const(index), value, Const(0)); + } + else + { + return context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VWord, target, Const(index), value, Const(0)); + } + } + + public static Operand EmitExtractScalar(ArmEmitterContext context, Operand target, int reg, bool doubleWidth) + { + int index = reg & (doubleWidth ? 1 : 3); + if (index == 0) return target; // Element is already at index 0, so just return the vector directly. + + if (doubleWidth) + { + return context.AddIntrinsic(Intrinsic.Arm64DupSe | Intrinsic.Arm64VDWord, target, Const(1)); // Extract high (index 1). + } + else + { + return context.AddIntrinsic(Intrinsic.Arm64DupSe | Intrinsic.Arm64VWord, target, Const(index)); // Extract element at index. + } + } + + // Vector Operand Templates + + public static void EmitVectorUnaryOpSimd32(ArmEmitterContext context, Func1I vectorFunc) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + Operand m = GetVecA32(op.Qm); + Operand d = GetVecA32(op.Qd); + + if (!op.Q) // Register swap: move relevant doubleword to destination side. + { + m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd); + } + + Operand res = vectorFunc(m); + + if (!op.Q) // Register insert. + { + res = EmitDoubleWordInsert(context, d, res, op.Vd); + } + + context.Copy(d, res); + } + + public static void EmitVectorUnaryOpF32(ArmEmitterContext context, Intrinsic inst) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128; + EmitVectorUnaryOpSimd32(context, (m) => context.AddIntrinsic(inst, m)); + } + + public static void EmitVectorBinaryOpSimd32(ArmEmitterContext context, Func2I vectorFunc, int side = -1) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand n = GetVecA32(op.Qn); + Operand m = GetVecA32(op.Qm); + Operand d = GetVecA32(op.Qd); + + if (side == -1) + { + side = op.Vd; + } + + if (!op.Q) // Register swap: move relevant doubleword to destination side. + { + n = EmitMoveDoubleWordToSide(context, n, op.Vn, side); + m = EmitMoveDoubleWordToSide(context, m, op.Vm, side); + } + + Operand res = vectorFunc(n, m); + + if (!op.Q) // Register insert. + { + if (side != op.Vd) + { + res = EmitMoveDoubleWordToSide(context, res, side, op.Vd); + } + res = EmitDoubleWordInsert(context, d, res, op.Vd); + } + + context.Copy(d, res); + } + + public static void EmitVectorBinaryOpF32(ArmEmitterContext context, Intrinsic inst) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128; + EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m)); + } + + public static void EmitVectorTernaryOpSimd32(ArmEmitterContext context, Func3I vectorFunc) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + Operand n = GetVecA32(op.Qn); + Operand m = GetVecA32(op.Qm); + Operand d = GetVecA32(op.Qd); + Operand initialD = d; + + if (!op.Q) // Register swap: move relevant doubleword to destination side. + { + n = EmitMoveDoubleWordToSide(context, n, op.Vn, op.Vd); + m = EmitMoveDoubleWordToSide(context, m, op.Vm, op.Vd); + } + + Operand res = vectorFunc(d, n, m); + + if (!op.Q) // Register insert. + { + res = EmitDoubleWordInsert(context, initialD, res, op.Vd); + } + + context.Copy(initialD, res); + } + + public static void EmitVectorTernaryOpF32(ArmEmitterContext context, Intrinsic inst) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128; + EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(inst, d, n, m)); + } + + public static void EmitScalarUnaryOpSimd32(ArmEmitterContext context, Func1I scalarFunc) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + bool doubleSize = (op.Size & 1) != 0; + int shift = doubleSize ? 1 : 2; + Operand m = GetVecA32(op.Vm >> shift); + Operand d = GetVecA32(op.Vd >> shift); + + m = EmitExtractScalar(context, m, op.Vm, doubleSize); + + Operand res = scalarFunc(m); + + // Insert scalar into vector. + res = EmitScalarInsert(context, d, res, op.Vd, doubleSize); + + context.Copy(d, res); + } + + public static void EmitScalarUnaryOpF32(ArmEmitterContext context, Intrinsic inst) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128; + EmitScalarUnaryOpSimd32(context, (m) => (inst == 0) ? m : context.AddIntrinsic(inst, m)); + } + + public static void EmitScalarBinaryOpSimd32(ArmEmitterContext context, Func2I scalarFunc) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + bool doubleSize = (op.Size & 1) != 0; + int shift = doubleSize ? 1 : 2; + Operand n = GetVecA32(op.Vn >> shift); + Operand m = GetVecA32(op.Vm >> shift); + Operand d = GetVecA32(op.Vd >> shift); + + n = EmitExtractScalar(context, n, op.Vn, doubleSize); + m = EmitExtractScalar(context, m, op.Vm, doubleSize); + + Operand res = scalarFunc(n, m); + + // Insert scalar into vector. + res = EmitScalarInsert(context, d, res, op.Vd, doubleSize); + + context.Copy(d, res); + } + + public static void EmitScalarBinaryOpF32(ArmEmitterContext context, Intrinsic inst) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128; + EmitScalarBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst, n, m)); + } + + public static void EmitScalarTernaryOpSimd32(ArmEmitterContext context, Func3I scalarFunc) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + bool doubleSize = (op.Size & 1) != 0; + int shift = doubleSize ? 1 : 2; + Operand n = GetVecA32(op.Vn >> shift); + Operand m = GetVecA32(op.Vm >> shift); + Operand d = GetVecA32(op.Vd >> shift); + Operand initialD = d; + + n = EmitExtractScalar(context, n, op.Vn, doubleSize); + m = EmitExtractScalar(context, m, op.Vm, doubleSize); + d = EmitExtractScalar(context, d, op.Vd, doubleSize); + + Operand res = scalarFunc(d, n, m); + + // Insert scalar into vector. + res = EmitScalarInsert(context, initialD, res, op.Vd, doubleSize); + + context.Copy(initialD, res); + } + + public static void EmitScalarTernaryOpF32(ArmEmitterContext context, Intrinsic inst) + { + OpCode32SimdRegS op = (OpCode32SimdRegS)context.CurrOp; + + inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128; + EmitScalarTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(inst, d, n, m)); + } + + // Pairwise + + public static void EmitVectorPairwiseOpF32(ArmEmitterContext context, Intrinsic inst32) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + inst32 |= Intrinsic.Arm64V64 | Intrinsic.Arm64VFloat; + EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(inst32, n, m), 0); + } + + public static void EmitVcmpOrVcmpe(ArmEmitterContext context, bool signalNaNs) + { + OpCode32SimdS op = (OpCode32SimdS)context.CurrOp; + + bool cmpWithZero = (op.Opc & 2) != 0; + + Intrinsic inst = signalNaNs ? Intrinsic.Arm64FcmpeS : Intrinsic.Arm64FcmpS; + inst |= ((op.Size & 1) != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128; + + bool doubleSize = (op.Size & 1) != 0; + int shift = doubleSize ? 1 : 2; + Operand n = GetVecA32(op.Vd >> shift); + Operand m = GetVecA32(op.Vm >> shift); + + n = EmitExtractScalar(context, n, op.Vd, doubleSize); + m = cmpWithZero ? Const(0) : EmitExtractScalar(context, m, op.Vm, doubleSize); + + Operand nzcv = context.AddIntrinsicInt(inst, n, m); + + Operand one = Const(1); + + SetFpFlag(context, FPState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(28)), one)); + SetFpFlag(context, FPState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(29)), one)); + SetFpFlag(context, FPState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(30)), one)); + SetFpFlag(context, FPState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(31)), one)); + } + + public static void EmitCmpOpF32(ArmEmitterContext context, CmpCondition cond, bool zero) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + int sizeF = op.Size & 1; + + Intrinsic inst; + if (zero) + { + inst = cond switch + { + CmpCondition.Equal => Intrinsic.Arm64FcmeqVz, + CmpCondition.GreaterThan => Intrinsic.Arm64FcmgtVz, + CmpCondition.GreaterThanOrEqual => Intrinsic.Arm64FcmgeVz, + CmpCondition.LessThan => Intrinsic.Arm64FcmltVz, + CmpCondition.LessThanOrEqual => Intrinsic.Arm64FcmleVz, + _ => throw new InvalidOperationException() + }; + } + else { + inst = cond switch + { + CmpCondition.Equal => Intrinsic.Arm64FcmeqV, + CmpCondition.GreaterThan => Intrinsic.Arm64FcmgtV, + CmpCondition.GreaterThanOrEqual => Intrinsic.Arm64FcmgeV, + _ => throw new InvalidOperationException() + }; + } + + inst |= (sizeF != 0 ? Intrinsic.Arm64VDouble : Intrinsic.Arm64VFloat) | Intrinsic.Arm64V128; + + if (zero) + { + EmitVectorUnaryOpSimd32(context, (m) => + { + return context.AddIntrinsic(inst, m); + }); + } + else + { + EmitVectorBinaryOpSimd32(context, (n, m) => + { + return context.AddIntrinsic(inst, n, m); + }); + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs b/src/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs new file mode 100644 index 00000000..f0d242ae --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSimdHelperArm64.cs @@ -0,0 +1,720 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static class InstEmitSimdHelperArm64 + { + public static void EmitScalarUnaryOpF(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n)); + } + + public static void EmitScalarUnaryOpFFromGp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n)); + } + + public static void EmitScalarUnaryOpFToGp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp; + + Operand n = GetVec(op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + SetIntOrZR(context, op.Rd, op.RegisterSize == RegisterSize.Int32 + ? context.AddIntrinsicInt (inst, n) + : context.AddIntrinsicLong(inst, n)); + } + + public static void EmitScalarBinaryOpF(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m)); + } + + public static void EmitScalarBinaryOpFByElem(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index))); + } + + public static void EmitScalarTernaryOpF(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + Operand a = GetVec(op.Ra); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, a, n, m)); + } + + public static void EmitScalarTernaryOpFRdByElem(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index))); + } + + public static void EmitScalarUnaryOp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n)); + } + + public static void EmitScalarBinaryOp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m)); + } + + public static void EmitScalarBinaryOpRd(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n)); + } + + public static void EmitScalarTernaryOpRd(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + context.Copy(d, context.AddIntrinsic(inst, d, n, m)); + } + + public static void EmitScalarShiftBinaryOp(ArmEmitterContext context, Intrinsic inst, int shift) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(shift))); + } + + public static void EmitScalarShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift))); + } + + public static void EmitScalarSaturatingShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift))); + + context.SetPendingQcFlagSync(); + } + + public static void EmitScalarSaturatingUnaryOp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + Operand result = context.AddIntrinsic(inst, n); + + context.Copy(GetVec(op.Rd), result); + + context.SetPendingQcFlagSync(); + } + + public static void EmitScalarSaturatingBinaryOp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + Operand result = context.AddIntrinsic(inst, n, m); + + context.Copy(GetVec(op.Rd), result); + + context.SetPendingQcFlagSync(); + } + + public static void EmitScalarSaturatingBinaryOpRd(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + Operand result = context.AddIntrinsic(inst, d, n); + + context.Copy(GetVec(op.Rd), result); + + context.SetPendingQcFlagSync(); + } + + public static void EmitScalarConvertBinaryOpF(ArmEmitterContext context, Intrinsic inst, int fBits) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits))); + } + + public static void EmitScalarConvertBinaryOpFFromGp(ArmEmitterContext context, Intrinsic inst, int fBits) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits))); + } + + public static void EmitScalarConvertBinaryOpFToGp(ArmEmitterContext context, Intrinsic inst, int fBits) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + SetIntOrZR(context, op.Rd, op.RegisterSize == RegisterSize.Int32 + ? context.AddIntrinsicInt (inst, n, Const(fBits)) + : context.AddIntrinsicLong(inst, n, Const(fBits))); + } + + public static void EmitVectorUnaryOpF(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n)); + } + + public static void EmitVectorBinaryOpF(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m)); + } + + public static void EmitVectorBinaryOpFRd(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n)); + } + + public static void EmitVectorBinaryOpFByElem(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index))); + } + + public static void EmitVectorTernaryOpFRd(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(d, context.AddIntrinsic(inst, d, n, m)); + } + + public static void EmitVectorTernaryOpFRdByElem(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdRegElemF op = (OpCodeSimdRegElemF)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index))); + } + + public static void EmitVectorUnaryOp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n)); + } + + public static void EmitVectorBinaryOp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m)); + } + + public static void EmitVectorBinaryOpRd(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n)); + } + + public static void EmitVectorBinaryOpByElem(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, m, Const(op.Index))); + } + + public static void EmitVectorTernaryOpRd(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(d, context.AddIntrinsic(inst, d, n, m)); + } + + public static void EmitVectorTernaryOpRdByElem(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(d, context.AddIntrinsic(inst, d, n, m, Const(op.Index))); + } + + public static void EmitVectorShiftBinaryOp(ArmEmitterContext context, Intrinsic inst, int shift) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(shift))); + } + + public static void EmitVectorShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift))); + } + + public static void EmitVectorSaturatingShiftTernaryOpRd(ArmEmitterContext context, Intrinsic inst, int shift) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, d, n, Const(shift))); + + context.SetPendingQcFlagSync(); + } + + public static void EmitVectorSaturatingUnaryOp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + Operand result = context.AddIntrinsic(inst, n); + + context.Copy(GetVec(op.Rd), result); + + context.SetPendingQcFlagSync(); + } + + public static void EmitVectorSaturatingBinaryOp(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + Operand result = context.AddIntrinsic(inst, n, m); + + context.Copy(GetVec(op.Rd), result); + + context.SetPendingQcFlagSync(); + } + + public static void EmitVectorSaturatingBinaryOpRd(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + Operand result = context.AddIntrinsic(inst, d, n); + + context.Copy(GetVec(op.Rd), result); + + context.SetPendingQcFlagSync(); + } + + public static void EmitVectorSaturatingBinaryOpByElem(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdRegElem op = (OpCodeSimdRegElem)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + inst |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + Operand result = context.AddIntrinsic(inst, n, m, Const(op.Index)); + + context.Copy(GetVec(op.Rd), result); + + context.SetPendingQcFlagSync(); + } + + public static void EmitVectorConvertBinaryOpF(ArmEmitterContext context, Intrinsic inst, int fBits) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, n, Const(fBits))); + } + + public static void EmitVectorLookupTable(ArmEmitterContext context, Intrinsic inst) + { + OpCodeSimdTbl op = (OpCodeSimdTbl)context.CurrOp; + + Operand[] operands = new Operand[op.Size + 1]; + + operands[op.Size] = GetVec(op.Rm); + + for (int index = 0; index < op.Size; index++) + { + operands[index] = GetVec((op.Rn + index) & 0x1F); + } + + if (op.RegisterSize == RegisterSize.Simd128) + { + inst |= Intrinsic.Arm64V128; + } + + context.Copy(GetVec(op.Rd), context.AddIntrinsic(inst, operands)); + } + + public static void EmitFcmpOrFcmpe(ArmEmitterContext context, bool signalNaNs) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + bool cmpWithZero = !(op is OpCodeSimdFcond) ? op.Bit3 : false; + + Intrinsic inst = signalNaNs ? Intrinsic.Arm64FcmpeS : Intrinsic.Arm64FcmpS; + + if ((op.Size & 1) != 0) + { + inst |= Intrinsic.Arm64VDouble; + } + + Operand n = GetVec(op.Rn); + Operand m = cmpWithZero ? Const(0) : GetVec(op.Rm); + + Operand nzcv = context.AddIntrinsicInt(inst, n, m); + + Operand one = Const(1); + + SetFlag(context, PState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(28)), one)); + SetFlag(context, PState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(29)), one)); + SetFlag(context, PState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(30)), one)); + SetFlag(context, PState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const(31)), one)); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Instructions/InstEmitSimdLogical.cs b/src/ARMeilleure/Instructions/InstEmitSimdLogical.cs new file mode 100644 index 00000000..2bf531e6 --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSimdLogical.cs @@ -0,0 +1,612 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void And_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64AndV); + } + else if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pand, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseAnd(op1, op2)); + } + } + + public static void Bic_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64BicV); + } + else if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, n); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => + { + return context.BitwiseAnd(op1, context.BitwiseNot(op2)); + }); + } + } + + public static void Bic_Vi(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; + + int eSize = 8 << op.Size; + + Operand d = GetVec(op.Rd); + Operand imm = eSize switch { + 16 => X86GetAllElements(context, (short)~op.Immediate), + 32 => X86GetAllElements(context, (int)~op.Immediate), + _ => throw new InvalidOperationException($"Invalid element size {eSize}.") + }; + + Operand res = context.AddIntrinsic(Intrinsic.X86Pand, d, imm); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorImmBinaryOp(context, (op1, op2) => + { + return context.BitwiseAnd(op1, context.BitwiseNot(op2)); + }); + } + } + + public static void Bif_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BifV); + } + else + { + EmitBifBit(context, notRm: true); + } + } + + public static void Bit_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BitV); + } + else + { + EmitBifBit(context, notRm: false); + } + } + + private static void EmitBifBit(ArmEmitterContext context, bool notRm) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, d); + + if (notRm) + { + res = context.AddIntrinsic(Intrinsic.X86Pandn, m, res); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Pand, m, res); + } + + res = context.AddIntrinsic(Intrinsic.X86Pxor, d, res); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else + { + Operand res = context.VectorZero(); + + int elems = op.RegisterSize == RegisterSize.Simd128 ? 2 : 1; + + for (int index = 0; index < elems; index++) + { + Operand d = EmitVectorExtractZx(context, op.Rd, index, 3); + Operand n = EmitVectorExtractZx(context, op.Rn, index, 3); + Operand m = EmitVectorExtractZx(context, op.Rm, index, 3); + + if (notRm) + { + m = context.BitwiseNot(m); + } + + Operand e = context.BitwiseExclusiveOr(d, n); + + e = context.BitwiseAnd(e, m); + e = context.BitwiseExclusiveOr(e, d); + + res = EmitVectorInsert(context, res, e, index, 3); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Bsl_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorTernaryOpRd(context, Intrinsic.Arm64BslV); + } + else if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m); + + res = context.AddIntrinsic(Intrinsic.X86Pand, res, d); + res = context.AddIntrinsic(Intrinsic.X86Pxor, res, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else + { + EmitVectorTernaryOpZx(context, (op1, op2, op3) => + { + return context.BitwiseExclusiveOr( + context.BitwiseAnd(op1, + context.BitwiseExclusiveOr(op2, op3)), op3); + }); + } + } + + public static void Eor_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64EorV); + } + else if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseExclusiveOr(op1, op2)); + } + } + + public static void Not_V(ArmEmitterContext context) + { + if (Optimizations.UseAvx512Ortho) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Operand res = context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, n, Const(~0b10101010)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else if (Optimizations.UseSse2) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Operand mask = X86GetAllElements(context, -1L); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, n, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorUnaryOpZx(context, (op1) => context.BitwiseNot(op1)); + } + } + + public static void Orn_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrnV); + } + else if (Optimizations.UseAvx512Ortho) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, m, Const(0b11001100 | ~0b10101010)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand mask = X86GetAllElements(context, -1L); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pandn, m, mask); + + res = context.AddIntrinsic(Intrinsic.X86Por, res, n); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => + { + return context.BitwiseOr(op1, context.BitwiseNot(op2)); + }); + } + } + + public static void Orr_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64OrrV); + } + else if (Optimizations.UseSse2) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Operand res = context.AddIntrinsic(Intrinsic.X86Por, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorBinaryOpZx(context, (op1, op2) => context.BitwiseOr(op1, op2)); + } + } + + public static void Orr_Vi(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; + + int eSize = 8 << op.Size; + + Operand d = GetVec(op.Rd); + Operand imm = eSize switch { + 16 => X86GetAllElements(context, (short)op.Immediate), + 32 => X86GetAllElements(context, (int)op.Immediate), + _ => throw new InvalidOperationException($"Invalid element size {eSize}.") + }; + + Operand res = context.AddIntrinsic(Intrinsic.X86Por, d, imm); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorImmBinaryOp(context, (op1, op2) => context.BitwiseOr(op1, op2)); + } + } + + public static void Rbit_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + if (Optimizations.UseGfni) + { + const long bitMatrix = + (0b10000000L << 56) | + (0b01000000L << 48) | + (0b00100000L << 40) | + (0b00010000L << 32) | + (0b00001000L << 24) | + (0b00000100L << 16) | + (0b00000010L << 8) | + (0b00000001L << 0); + + Operand vBitMatrix = X86GetAllElements(context, bitMatrix); + + Operand res = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, GetVec(op.Rn), vBitMatrix, Const(0)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand res = context.VectorZero(); + int elems = op.RegisterSize == RegisterSize.Simd128 ? 16 : 8; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, 0); + + Operand de = EmitReverseBits8Op(context, ne); + + res = EmitVectorInsert(context, res, de, index, 0); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + private static Operand EmitReverseBits8Op(ArmEmitterContext context, Operand op) + { + Debug.Assert(op.Type == OperandType.I64); + + Operand val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(op, Const(0xaaul)), Const(1)), + context.ShiftLeft (context.BitwiseAnd(op, Const(0x55ul)), Const(1))); + + val = context.BitwiseOr(context.ShiftRightUI(context.BitwiseAnd(val, Const(0xccul)), Const(2)), + context.ShiftLeft (context.BitwiseAnd(val, Const(0x33ul)), Const(2))); + + return context.BitwiseOr(context.ShiftRightUI(val, Const(4)), + context.ShiftLeft (context.BitwiseAnd(val, Const(0x0ful)), Const(4))); + } + + public static void Rev16_V(ArmEmitterContext context) + { + if (Optimizations.UseSsse3) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + const long maskE0 = 06L << 56 | 07L << 48 | 04L << 40 | 05L << 32 | 02L << 24 | 03L << 16 | 00L << 8 | 01L << 0; + const long maskE1 = 14L << 56 | 15L << 48 | 12L << 40 | 13L << 32 | 10L << 24 | 11L << 16 | 08L << 8 | 09L << 0; + + Operand mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitRev_V(context, containerSize: 1); + } + } + + public static void Rev32_V(ArmEmitterContext context) + { + if (Optimizations.UseSsse3) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Operand mask; + + if (op.Size == 0) + { + const long maskE0 = 04L << 56 | 05L << 48 | 06L << 40 | 07L << 32 | 00L << 24 | 01L << 16 | 02L << 8 | 03L << 0; + const long maskE1 = 12L << 56 | 13L << 48 | 14L << 40 | 15L << 32 | 08L << 24 | 09L << 16 | 10L << 8 | 11L << 0; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + else /* if (op.Size == 1) */ + { + const long maskE0 = 05L << 56 | 04L << 48 | 07L << 40 | 06L << 32 | 01L << 24 | 00L << 16 | 03L << 8 | 02L << 0; + const long maskE1 = 13L << 56 | 12L << 48 | 15L << 40 | 14L << 32 | 09L << 24 | 08L << 16 | 11L << 8 | 10L << 0; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + + Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitRev_V(context, containerSize: 2); + } + } + + public static void Rev64_V(ArmEmitterContext context) + { + if (Optimizations.UseSsse3) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetVec(op.Rn); + + Operand mask; + + if (op.Size == 0) + { + const long maskE0 = 00L << 56 | 01L << 48 | 02L << 40 | 03L << 32 | 04L << 24 | 05L << 16 | 06L << 8 | 07L << 0; + const long maskE1 = 08L << 56 | 09L << 48 | 10L << 40 | 11L << 32 | 12L << 24 | 13L << 16 | 14L << 8 | 15L << 0; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + else if (op.Size == 1) + { + const long maskE0 = 01L << 56 | 00L << 48 | 03L << 40 | 02L << 32 | 05L << 24 | 04L << 16 | 07L << 8 | 06L << 0; + const long maskE1 = 09L << 56 | 08L << 48 | 11L << 40 | 10L << 32 | 13L << 24 | 12L << 16 | 15L << 8 | 14L << 0; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + else /* if (op.Size == 2) */ + { + const long maskE0 = 03L << 56 | 02L << 48 | 01L << 40 | 00L << 32 | 07L << 24 | 06L << 16 | 05L << 8 | 04L << 0; + const long maskE1 = 11L << 56 | 10L << 48 | 09L << 40 | 08L << 32 | 15L << 24 | 14L << 16 | 13L << 8 | 12L << 0; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + + Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitRev_V(context, containerSize: 3); + } + } + + private static void EmitRev_V(ArmEmitterContext context, int containerSize) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + int containerMask = (1 << (containerSize - op.Size)) - 1; + + for (int index = 0; index < elems; index++) + { + int revIndex = index ^ containerMask; + + Operand ne = EmitVectorExtractZx(context, op.Rn, revIndex, op.Size); + + res = EmitVectorInsert(context, res, ne, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitSimdLogical32.cs b/src/ARMeilleure/Instructions/InstEmitSimdLogical32.cs new file mode 100644 index 00000000..68ef4ed1 --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSimdLogical32.cs @@ -0,0 +1,266 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper32; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Vand_I(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64AndV | Intrinsic.Arm64V128, n, m)); + } + else if (Optimizations.UseSse2) + { + EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pand, n, m)); + } + else + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseAnd(op1, op2)); + } + } + + public static void Vbic_I(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64BicV | Intrinsic.Arm64V128, n, m)); + } + else if (Optimizations.UseSse2) + { + EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pandn, m, n)); + } + else + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseAnd(op1, context.BitwiseNot(op2))); + } + } + + public static void Vbic_II(ArmEmitterContext context) + { + OpCode32SimdImm op = (OpCode32SimdImm)context.CurrOp; + + long immediate = op.Immediate; + + // Replicate fields to fill the 64-bits, if size is < 64-bits. + switch (op.Size) + { + case 0: immediate *= 0x0101010101010101L; break; + case 1: immediate *= 0x0001000100010001L; break; + case 2: immediate *= 0x0000000100000001L; break; + } + + Operand imm = Const(immediate); + Operand res = GetVecA32(op.Qd); + + if (op.Q) + { + for (int elem = 0; elem < 2; elem++) + { + Operand de = EmitVectorExtractZx(context, op.Qd, elem, 3); + + res = EmitVectorInsert(context, res, context.BitwiseAnd(de, context.BitwiseNot(imm)), elem, 3); + } + } + else + { + Operand de = EmitVectorExtractZx(context, op.Qd, op.Vd & 1, 3); + + res = EmitVectorInsert(context, res, context.BitwiseAnd(de, context.BitwiseNot(imm)), op.Vd & 1, 3); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void Vbif(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BifV | Intrinsic.Arm64V128, d, n, m)); + } + else + { + EmitBifBit(context, true); + } + } + + public static void Vbit(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BitV | Intrinsic.Arm64V128, d, n, m)); + } + else + { + EmitBifBit(context, false); + } + } + + public static void Vbsl(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorTernaryOpSimd32(context, (d, n, m) => context.AddIntrinsic(Intrinsic.Arm64BslV | Intrinsic.Arm64V128, d, n, m)); + } + else if (Optimizations.UseSse2) + { + EmitVectorTernaryOpSimd32(context, (d, n, m) => + { + Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, m); + res = context.AddIntrinsic(Intrinsic.X86Pand, res, d); + return context.AddIntrinsic(Intrinsic.X86Pxor, res, m); + }); + } + else + { + EmitVectorTernaryOpZx32(context, (op1, op2, op3) => + { + return context.BitwiseExclusiveOr( + context.BitwiseAnd(op1, + context.BitwiseExclusiveOr(op2, op3)), op3); + }); + } + } + + public static void Veor_I(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64EorV | Intrinsic.Arm64V128, n, m)); + } + else if (Optimizations.UseSse2) + { + EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Pxor, n, m)); + } + else + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseExclusiveOr(op1, op2)); + } + } + + public static void Vorn_I(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrnV | Intrinsic.Arm64V128, n, m)); + } + else if (Optimizations.UseAvx512Ortho) + { + EmitVectorBinaryOpSimd32(context, (n, m) => + { + return context.AddIntrinsic(Intrinsic.X86Vpternlogd, n, m, Const(0b11001100 | ~0b10101010)); + }); + } + else if (Optimizations.UseSse2) + { + Operand mask = context.VectorOne(); + + EmitVectorBinaryOpSimd32(context, (n, m) => + { + m = context.AddIntrinsic(Intrinsic.X86Pandn, m, mask); + return context.AddIntrinsic(Intrinsic.X86Por, n, m); + }); + } + else + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseOr(op1, context.BitwiseNot(op2))); + } + } + + public static void Vorr_I(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelper32Arm64.EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.Arm64OrrV | Intrinsic.Arm64V128, n, m)); + } + else if (Optimizations.UseSse2) + { + EmitVectorBinaryOpSimd32(context, (n, m) => context.AddIntrinsic(Intrinsic.X86Por, n, m)); + } + else + { + EmitVectorBinaryOpZx32(context, (op1, op2) => context.BitwiseOr(op1, op2)); + } + } + + public static void Vorr_II(ArmEmitterContext context) + { + OpCode32SimdImm op = (OpCode32SimdImm)context.CurrOp; + + long immediate = op.Immediate; + + // Replicate fields to fill the 64-bits, if size is < 64-bits. + switch (op.Size) + { + case 0: immediate *= 0x0101010101010101L; break; + case 1: immediate *= 0x0001000100010001L; break; + case 2: immediate *= 0x0000000100000001L; break; + } + + Operand imm = Const(immediate); + Operand res = GetVecA32(op.Qd); + + if (op.Q) + { + for (int elem = 0; elem < 2; elem++) + { + Operand de = EmitVectorExtractZx(context, op.Qd, elem, 3); + + res = EmitVectorInsert(context, res, context.BitwiseOr(de, imm), elem, 3); + } + } + else + { + Operand de = EmitVectorExtractZx(context, op.Qd, op.Vd & 1, 3); + + res = EmitVectorInsert(context, res, context.BitwiseOr(de, imm), op.Vd & 1, 3); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void Vtst(ArmEmitterContext context) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => + { + Operand isZero = context.ICompareEqual(context.BitwiseAnd(op1, op2), Const(0)); + return context.ConditionalSelect(isZero, Const(0), Const(-1)); + }); + } + + private static void EmitBifBit(ArmEmitterContext context, bool notRm) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + if (Optimizations.UseSse2) + { + EmitVectorTernaryOpSimd32(context, (d, n, m) => + { + Operand res = context.AddIntrinsic(Intrinsic.X86Pxor, n, d); + res = context.AddIntrinsic((notRm) ? Intrinsic.X86Pandn : Intrinsic.X86Pand, m, res); + return context.AddIntrinsic(Intrinsic.X86Pxor, d, res); + }); + } + else + { + EmitVectorTernaryOpZx32(context, (d, n, m) => + { + if (notRm) + { + m = context.BitwiseNot(m); + } + return context.BitwiseExclusiveOr( + context.BitwiseAnd(m, + context.BitwiseExclusiveOr(d, n)), d); + }); + } + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitSimdMemory.cs b/src/ARMeilleure/Instructions/InstEmitSimdMemory.cs new file mode 100644 index 00000000..9b19872a --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSimdMemory.cs @@ -0,0 +1,160 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System.Diagnostics; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitMemoryHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + public static void Ld__Vms(ArmEmitterContext context) + { + EmitSimdMemMs(context, isLoad: true); + } + + public static void Ld__Vss(ArmEmitterContext context) + { + EmitSimdMemSs(context, isLoad: true); + } + + public static void St__Vms(ArmEmitterContext context) + { + EmitSimdMemMs(context, isLoad: false); + } + + public static void St__Vss(ArmEmitterContext context) + { + EmitSimdMemSs(context, isLoad: false); + } + + private static void EmitSimdMemMs(ArmEmitterContext context, bool isLoad) + { + OpCodeSimdMemMs op = (OpCodeSimdMemMs)context.CurrOp; + + Operand n = GetIntOrSP(context, op.Rn); + + long offset = 0; + + for (int rep = 0; rep < op.Reps; rep++) + for (int elem = 0; elem < op.Elems; elem++) + for (int sElem = 0; sElem < op.SElems; sElem++) + { + int rtt = (op.Rt + rep + sElem) & 0x1f; + + Operand tt = GetVec(rtt); + + Operand address = context.Add(n, Const(offset)); + + if (isLoad) + { + EmitLoadSimd(context, address, tt, rtt, elem, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64 && elem == op.Elems - 1) + { + context.Copy(tt, context.VectorZeroUpper64(tt)); + } + } + else + { + EmitStoreSimd(context, address, rtt, elem, op.Size); + } + + offset += 1 << op.Size; + } + + if (op.WBack) + { + EmitSimdMemWBack(context, offset); + } + } + + private static void EmitSimdMemSs(ArmEmitterContext context, bool isLoad) + { + OpCodeSimdMemSs op = (OpCodeSimdMemSs)context.CurrOp; + + Operand n = GetIntOrSP(context, op.Rn); + + long offset = 0; + + if (op.Replicate) + { + // Only loads uses the replicate mode. + Debug.Assert(isLoad, "Replicate mode is not valid for stores."); + + int elems = op.GetBytesCount() >> op.Size; + + for (int sElem = 0; sElem < op.SElems; sElem++) + { + int rt = (op.Rt + sElem) & 0x1f; + + Operand t = GetVec(rt); + + Operand address = context.Add(n, Const(offset)); + + for (int index = 0; index < elems; index++) + { + EmitLoadSimd(context, address, t, rt, index, op.Size); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + context.Copy(t, context.VectorZeroUpper64(t)); + } + + offset += 1 << op.Size; + } + } + else + { + for (int sElem = 0; sElem < op.SElems; sElem++) + { + int rt = (op.Rt + sElem) & 0x1f; + + Operand t = GetVec(rt); + + Operand address = context.Add(n, Const(offset)); + + if (isLoad) + { + EmitLoadSimd(context, address, t, rt, op.Index, op.Size); + } + else + { + EmitStoreSimd(context, address, rt, op.Index, op.Size); + } + + offset += 1 << op.Size; + } + } + + if (op.WBack) + { + EmitSimdMemWBack(context, offset); + } + } + + private static void EmitSimdMemWBack(ArmEmitterContext context, long offset) + { + OpCodeMemReg op = (OpCodeMemReg)context.CurrOp; + + Operand n = GetIntOrSP(context, op.Rn); + Operand m; + + if (op.Rm != RegisterAlias.Zr) + { + m = GetIntOrZR(context, op.Rm); + } + else + { + m = Const(offset); + } + + context.Copy(n, context.Add(n, m)); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Instructions/InstEmitSimdMemory32.cs b/src/ARMeilleure/Instructions/InstEmitSimdMemory32.cs new file mode 100644 index 00000000..b774bd06 --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSimdMemory32.cs @@ -0,0 +1,352 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitMemoryHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Vld1(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 1, true); + } + + public static void Vld2(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 2, true); + } + + public static void Vld3(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 3, true); + } + + public static void Vld4(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 4, true); + } + + public static void Vst1(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 1, false); + } + + public static void Vst2(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 2, false); + } + + public static void Vst3(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 3, false); + } + + public static void Vst4(ArmEmitterContext context) + { + EmitVStoreOrLoadN(context, 4, false); + } + + public static void EmitVStoreOrLoadN(ArmEmitterContext context, int count, bool load) + { + if (context.CurrOp is OpCode32SimdMemSingle) + { + OpCode32SimdMemSingle op = (OpCode32SimdMemSingle)context.CurrOp; + + int eBytes = 1 << op.Size; + + Operand n = context.Copy(GetIntA32(context, op.Rn)); + + // TODO: Check alignment. + int offset = 0; + int d = op.Vd; + + for (int i = 0; i < count; i++) + { + // Accesses an element from a double simd register. + Operand address = context.Add(n, Const(offset)); + if (eBytes == 8) + { + if (load) + { + EmitDVectorLoad(context, address, d); + } + else + { + EmitDVectorStore(context, address, d); + } + } + else + { + int index = ((d & 1) << (3 - op.Size)) + op.Index; + if (load) + { + if (op.Replicate) + { + var regs = (count > 1) ? 1 : op.Increment; + for (int reg = 0; reg < regs; reg++) + { + int dreg = reg + d; + int rIndex = ((dreg & 1) << (3 - op.Size)); + int limit = rIndex + (1 << (3 - op.Size)); + + while (rIndex < limit) + { + EmitLoadSimd(context, address, GetVecA32(dreg >> 1), dreg >> 1, rIndex++, op.Size); + } + } + } + else + { + EmitLoadSimd(context, address, GetVecA32(d >> 1), d >> 1, index, op.Size); + } + } + else + { + EmitStoreSimd(context, address, d >> 1, index, op.Size); + } + } + offset += eBytes; + d += op.Increment; + } + + if (op.WBack) + { + if (op.RegisterIndex) + { + Operand m = GetIntA32(context, op.Rm); + SetIntA32(context, op.Rn, context.Add(n, m)); + } + else + { + SetIntA32(context, op.Rn, context.Add(n, Const(count * eBytes))); + } + } + } + else + { + OpCode32SimdMemPair op = (OpCode32SimdMemPair)context.CurrOp; + + int increment = count > 1 ? op.Increment : 1; + int eBytes = 1 << op.Size; + + Operand n = context.Copy(GetIntA32(context, op.Rn)); + int offset = 0; + int d = op.Vd; + + for (int reg = 0; reg < op.Regs; reg++) + { + for (int elem = 0; elem < op.Elems; elem++) + { + int elemD = d + reg; + for (int i = 0; i < count; i++) + { + // Accesses an element from a double simd register, + // add ebytes for each element. + Operand address = context.Add(n, Const(offset)); + int index = ((elemD & 1) << (3 - op.Size)) + elem; + if (eBytes == 8) + { + if (load) + { + EmitDVectorLoad(context, address, elemD); + } + else + { + EmitDVectorStore(context, address, elemD); + } + } + else + { + if (load) + { + EmitLoadSimd(context, address, GetVecA32(elemD >> 1), elemD >> 1, index, op.Size); + } + else + { + EmitStoreSimd(context, address, elemD >> 1, index, op.Size); + } + } + + offset += eBytes; + elemD += increment; + } + } + } + + if (op.WBack) + { + if (op.RegisterIndex) + { + Operand m = GetIntA32(context, op.Rm); + SetIntA32(context, op.Rn, context.Add(n, m)); + } + else + { + SetIntA32(context, op.Rn, context.Add(n, Const(count * 8 * op.Regs))); + } + } + } + } + + public static void Vldm(ArmEmitterContext context) + { + OpCode32SimdMemMult op = (OpCode32SimdMemMult)context.CurrOp; + + Operand n = context.Copy(GetIntA32(context, op.Rn)); + + Operand baseAddress = context.Add(n, Const(op.Offset)); + + bool writeBack = op.PostOffset != 0; + + if (writeBack) + { + SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset))); + } + + int range = op.RegisterRange; + + int sReg = (op.DoubleWidth) ? (op.Vd << 1) : op.Vd; + int offset = 0; + int byteSize = 4; + + for (int num = 0; num < range; num++, sReg++) + { + Operand address = context.Add(baseAddress, Const(offset)); + Operand vec = GetVecA32(sReg >> 2); + + EmitLoadSimd(context, address, vec, sReg >> 2, sReg & 3, WordSizeLog2); + offset += byteSize; + } + } + + public static void Vstm(ArmEmitterContext context) + { + OpCode32SimdMemMult op = (OpCode32SimdMemMult)context.CurrOp; + + Operand n = context.Copy(GetIntA32(context, op.Rn)); + + Operand baseAddress = context.Add(n, Const(op.Offset)); + + bool writeBack = op.PostOffset != 0; + + if (writeBack) + { + SetIntA32(context, op.Rn, context.Add(n, Const(op.PostOffset))); + } + + int offset = 0; + + int range = op.RegisterRange; + int sReg = (op.DoubleWidth) ? (op.Vd << 1) : op.Vd; + int byteSize = 4; + + for (int num = 0; num < range; num++, sReg++) + { + Operand address = context.Add(baseAddress, Const(offset)); + + EmitStoreSimd(context, address, sReg >> 2, sReg & 3, WordSizeLog2); + + offset += byteSize; + } + } + + public static void Vldr(ArmEmitterContext context) + { + EmitVLoadOrStore(context, AccessType.Load); + } + + public static void Vstr(ArmEmitterContext context) + { + EmitVLoadOrStore(context, AccessType.Store); + } + + private static void EmitDVectorStore(ArmEmitterContext context, Operand address, int vecD) + { + int vecQ = vecD >> 1; + int vecSElem = (vecD & 1) << 1; + Operand lblBigEndian = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); + + EmitStoreSimd(context, address, vecQ, vecSElem, WordSizeLog2); + EmitStoreSimd(context, context.Add(address, Const(4)), vecQ, vecSElem | 1, WordSizeLog2); + + context.Branch(lblEnd); + + context.MarkLabel(lblBigEndian); + + EmitStoreSimd(context, address, vecQ, vecSElem | 1, WordSizeLog2); + EmitStoreSimd(context, context.Add(address, Const(4)), vecQ, vecSElem, WordSizeLog2); + + context.MarkLabel(lblEnd); + } + + private static void EmitDVectorLoad(ArmEmitterContext context, Operand address, int vecD) + { + int vecQ = vecD >> 1; + int vecSElem = (vecD & 1) << 1; + Operand vec = GetVecA32(vecQ); + + Operand lblBigEndian = Label(); + Operand lblEnd = Label(); + + context.BranchIfTrue(lblBigEndian, GetFlag(PState.EFlag)); + + EmitLoadSimd(context, address, vec, vecQ, vecSElem, WordSizeLog2); + EmitLoadSimd(context, context.Add(address, Const(4)), vec, vecQ, vecSElem | 1, WordSizeLog2); + + context.Branch(lblEnd); + + context.MarkLabel(lblBigEndian); + + EmitLoadSimd(context, address, vec, vecQ, vecSElem | 1, WordSizeLog2); + EmitLoadSimd(context, context.Add(address, Const(4)), vec, vecQ, vecSElem, WordSizeLog2); + + context.MarkLabel(lblEnd); + } + + private static void EmitVLoadOrStore(ArmEmitterContext context, AccessType accType) + { + OpCode32SimdMemImm op = (OpCode32SimdMemImm)context.CurrOp; + + Operand n = context.Copy(GetIntA32(context, op.Rn)); + Operand m = GetMemM(context, setCarry: false); + + Operand address = op.Add + ? context.Add(n, m) + : context.Subtract(n, m); + + int size = op.Size; + + if ((accType & AccessType.Load) != 0) + { + if (size == DWordSizeLog2) + { + EmitDVectorLoad(context, address, op.Vd); + } + else + { + Operand vec = GetVecA32(op.Vd >> 2); + EmitLoadSimd(context, address, vec, op.Vd >> 2, (op.Vd & 3) << (2 - size), size); + } + } + else + { + if (size == DWordSizeLog2) + { + EmitDVectorStore(context, address, op.Vd); + } + else + { + EmitStoreSimd(context, address, op.Vd >> 2, (op.Vd & 3) << (2 - size), size); + } + } + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitSimdMove.cs b/src/ARMeilleure/Instructions/InstEmitSimdMove.cs new file mode 100644 index 00000000..b58a32f6 --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSimdMove.cs @@ -0,0 +1,850 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System.Collections.Generic; +using System.Reflection; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { +#region "Masks" + private static readonly long[] _masksE0_Uzp = new long[] + { + 13L << 56 | 09L << 48 | 05L << 40 | 01L << 32 | 12L << 24 | 08L << 16 | 04L << 8 | 00L << 0, + 11L << 56 | 10L << 48 | 03L << 40 | 02L << 32 | 09L << 24 | 08L << 16 | 01L << 8 | 00L << 0 + }; + + private static readonly long[] _masksE1_Uzp = new long[] + { + 15L << 56 | 11L << 48 | 07L << 40 | 03L << 32 | 14L << 24 | 10L << 16 | 06L << 8 | 02L << 0, + 15L << 56 | 14L << 48 | 07L << 40 | 06L << 32 | 13L << 24 | 12L << 16 | 05L << 8 | 04L << 0 + }; +#endregion + + public static void Dup_Gp(ArmEmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + + if (Optimizations.UseSse2) + { + switch (op.Size) + { + case 0: n = context.ZeroExtend8 (n.Type, n); n = context.Multiply(n, Const(n.Type, 0x01010101)); break; + case 1: n = context.ZeroExtend16(n.Type, n); n = context.Multiply(n, Const(n.Type, 0x00010001)); break; + case 2: n = context.ZeroExtend32(n.Type, n); break; + } + + Operand res = context.VectorInsert(context.VectorZero(), n, 0); + + if (op.Size < 3) + { + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0xf0)); + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0)); + } + } + else + { + res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + res = EmitVectorInsert(context, res, n, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Dup_S(ArmEmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); + + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), ne, 0, op.Size)); + } + + public static void Dup_V(ArmEmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + if (Optimizations.UseSse2) + { + Operand res = GetVec(op.Rn); + + if (op.Size == 0) + { + if (op.DstIndex != 0) + { + res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(op.DstIndex)); + } + + res = context.AddIntrinsic(Intrinsic.X86Punpcklbw, res, res); + res = context.AddIntrinsic(Intrinsic.X86Punpcklwd, res, res); + res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0)); + } + else if (op.Size == 1) + { + if (op.DstIndex != 0) + { + res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(op.DstIndex * 2)); + } + + res = context.AddIntrinsic(Intrinsic.X86Punpcklwd, res, res); + res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(0)); + } + else if (op.Size == 2) + { + int mask = op.DstIndex * 0b01010101; + + res = context.AddIntrinsic(Intrinsic.X86Shufps, res, res, Const(mask)); + } + else if (op.DstIndex == 0 && op.RegisterSize != RegisterSize.Simd64) + { + res = context.AddIntrinsic(Intrinsic.X86Movlhps, res, res); + } + else if (op.DstIndex == 1) + { + res = context.AddIntrinsic(Intrinsic.X86Movhlps, res, res); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + res = EmitVectorInsert(context, res, ne, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Ext_V(ArmEmitterContext context) + { + OpCodeSimdExt op = (OpCodeSimdExt)context.CurrOp; + + if (Optimizations.UseSse2) + { + Operand nShifted = GetVec(op.Rn); + + if (op.RegisterSize == RegisterSize.Simd64) + { + nShifted = context.VectorZeroUpper64(nShifted); + } + + nShifted = context.AddIntrinsic(Intrinsic.X86Psrldq, nShifted, Const(op.Imm4)); + + Operand mShifted = GetVec(op.Rm); + + mShifted = context.AddIntrinsic(Intrinsic.X86Pslldq, mShifted, Const(op.GetBytesCount() - op.Imm4)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + mShifted = context.VectorZeroUpper64(mShifted); + } + + Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, mShifted); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand res = context.VectorZero(); + + int bytes = op.GetBytesCount(); + + int position = op.Imm4 & (bytes - 1); + + for (int index = 0; index < bytes; index++) + { + int reg = op.Imm4 + index < bytes ? op.Rn : op.Rm; + + Operand e = EmitVectorExtractZx(context, reg, position, 0); + + position = (position + 1) & (bytes - 1); + + res = EmitVectorInsert(context, res, e, index, 0); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Fcsel_S(ArmEmitterContext context) + { + OpCodeSimdFcond op = (OpCodeSimdFcond)context.CurrOp; + + Operand lblTrue = Label(); + Operand lblEnd = Label(); + + Operand isTrue = InstEmitFlowHelper.GetCondTrue(context, op.Cond); + + context.BranchIfTrue(lblTrue, isTrue); + + OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64; + + Operand me = context.VectorExtract(type, GetVec(op.Rm), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), me, 0)); + + context.Branch(lblEnd); + + context.MarkLabel(lblTrue); + + Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0)); + + context.MarkLabel(lblEnd); + } + + public static void Fmov_Ftoi(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand ne = EmitVectorExtractZx(context, op.Rn, 0, op.Size + 2); + + SetIntOrZR(context, op.Rd, ne); + } + + public static void Fmov_Ftoi1(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand ne = EmitVectorExtractZx(context, op.Rn, 1, 3); + + SetIntOrZR(context, op.Rd, ne); + } + + public static void Fmov_Itof(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand n = GetIntOrZR(context, op.Rn); + + context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), n, 0, op.Size + 2)); + } + + public static void Fmov_Itof1(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetIntOrZR(context, op.Rn); + + context.Copy(d, EmitVectorInsert(context, d, n, 1, 3)); + } + + public static void Fmov_S(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64; + + Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0); + + context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), ne, 0)); + } + + public static void Fmov_Si(ArmEmitterContext context) + { + OpCodeSimdFmov op = (OpCodeSimdFmov)context.CurrOp; + + if (Optimizations.UseSse2) + { + if (op.Size == 0) + { + context.Copy(GetVec(op.Rd), X86GetScalar(context, (int)op.Immediate)); + } + else + { + context.Copy(GetVec(op.Rd), X86GetScalar(context, op.Immediate)); + } + } + else + { + Operand e = Const(op.Immediate); + + Operand res = context.VectorZero(); + + res = EmitVectorInsert(context, res, e, 0, op.Size + 2); + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Fmov_Vi(ArmEmitterContext context) + { + OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; + + if (Optimizations.UseSse2) + { + if (op.RegisterSize == RegisterSize.Simd128) + { + context.Copy(GetVec(op.Rd), X86GetAllElements(context, op.Immediate)); + } + else + { + context.Copy(GetVec(op.Rd), X86GetScalar(context, op.Immediate)); + } + } + else + { + Operand e = Const(op.Immediate); + + Operand res = context.VectorZero(); + + int elems = op.RegisterSize == RegisterSize.Simd128 ? 2 : 1; + + for (int index = 0; index < elems; index++) + { + res = EmitVectorInsert(context, res, e, index, 3); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + public static void Ins_Gp(ArmEmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand n = GetIntOrZR(context, op.Rn); + + context.Copy(d, EmitVectorInsert(context, d, n, op.DstIndex, op.Size)); + } + + public static void Ins_V(ArmEmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + Operand d = GetVec(op.Rd); + Operand ne = EmitVectorExtractZx(context, op.Rn, op.SrcIndex, op.Size); + + context.Copy(d, EmitVectorInsert(context, d, ne, op.DstIndex, op.Size)); + } + + public static void Movi_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + EmitSse2VectorMoviMvniOp(context, not: false); + } + else + { + EmitVectorImmUnaryOp(context, (op1) => op1); + } + } + + public static void Mvni_V(ArmEmitterContext context) + { + if (Optimizations.UseSse2) + { + EmitSse2VectorMoviMvniOp(context, not: true); + } + else + { + EmitVectorImmUnaryOp(context, (op1) => context.BitwiseNot(op1)); + } + } + + public static void Smov_S(ArmEmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + Operand ne = EmitVectorExtractSx(context, op.Rn, op.DstIndex, op.Size); + + if (op.RegisterSize == RegisterSize.Simd64) + { + ne = context.ZeroExtend32(OperandType.I64, ne); + } + + SetIntOrZR(context, op.Rd, ne); + } + + public static void Tbl_V(ArmEmitterContext context) + { + EmitTableVectorLookup(context, isTbl: true); + } + + public static void Tbx_V(ArmEmitterContext context) + { + EmitTableVectorLookup(context, isTbl: false); + } + + public static void Trn1_V(ArmEmitterContext context) + { + EmitVectorTranspose(context, part: 0); + } + + public static void Trn2_V(ArmEmitterContext context) + { + EmitVectorTranspose(context, part: 1); + } + + public static void Umov_S(ArmEmitterContext context) + { + OpCodeSimdIns op = (OpCodeSimdIns)context.CurrOp; + + Operand ne = EmitVectorExtractZx(context, op.Rn, op.DstIndex, op.Size); + + SetIntOrZR(context, op.Rd, ne); + } + + public static void Uzp1_V(ArmEmitterContext context) + { + EmitVectorUnzip(context, part: 0); + } + + public static void Uzp2_V(ArmEmitterContext context) + { + EmitVectorUnzip(context, part: 1); + } + + public static void Xtn_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + if (Optimizations.UseSsse3) + { + Operand d = GetVec(op.Rd); + + Operand res = context.VectorZeroUpper64(d); + + Operand mask = X86GetAllElements(context, EvenMasks[op.Size]); + + Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, GetVec(op.Rn), mask); + + Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 + ? Intrinsic.X86Movlhps + : Intrinsic.X86Movhlps; + + res = context.AddIntrinsic(movInst, res, res2); + + context.Copy(d, res); + } + else + { + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + Operand d = GetVec(op.Rd); + + Operand res = part == 0 ? context.VectorZero() : context.Copy(d); + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1); + + res = EmitVectorInsert(context, res, ne, part + index, op.Size); + } + + context.Copy(d, res); + } + } + + public static void Zip1_V(ArmEmitterContext context) + { + EmitVectorZip(context, part: 0); + } + + public static void Zip2_V(ArmEmitterContext context) + { + EmitVectorZip(context, part: 1); + } + + private static void EmitSse2VectorMoviMvniOp(ArmEmitterContext context, bool not) + { + OpCodeSimdImm op = (OpCodeSimdImm)context.CurrOp; + + long imm = op.Immediate; + + switch (op.Size) + { + case 0: imm *= 0x01010101; break; + case 1: imm *= 0x00010001; break; + } + + if (not) + { + imm = ~imm; + } + + Operand mask; + + if (op.Size < 3) + { + mask = X86GetAllElements(context, (int)imm); + } + else + { + mask = X86GetAllElements(context, imm); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + mask = context.VectorZeroUpper64(mask); + } + + context.Copy(GetVec(op.Rd), mask); + } + + private static void EmitTableVectorLookup(ArmEmitterContext context, bool isTbl) + { + OpCodeSimdTbl op = (OpCodeSimdTbl)context.CurrOp; + + if (Optimizations.UseSsse3) + { + Operand d = GetVec(op.Rd); + Operand m = GetVec(op.Rm); + + Operand res; + + Operand mask = X86GetAllElements(context, 0x0F0F0F0F0F0F0F0FL); + + // Fast path for single register table. + { + Operand n = GetVec(op.Rn); + + Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, mask); + mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, m); + + res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mMask); + } + + for (int index = 1; index < op.Size; index++) + { + Operand ni = GetVec((op.Rn + index) & 0x1F); + + Operand idxMask = X86GetAllElements(context, 0x1010101010101010L * index); + + Operand mSubMask = context.AddIntrinsic(Intrinsic.X86Psubb, m, idxMask); + + Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, mSubMask, mask); + mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, mSubMask); + + Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, ni, mMask); + + res = context.AddIntrinsic(Intrinsic.X86Por, res, res2); + } + + if (!isTbl) + { + Operand idxMask = X86GetAllElements(context, (0x1010101010101010L * op.Size) - 0x0101010101010101L); + Operand zeroMask = context.VectorZero(); + + Operand mPosMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, idxMask); + Operand mNegMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, zeroMask, m); + + Operand mMask = context.AddIntrinsic(Intrinsic.X86Por, mPosMask, mNegMask); + + Operand dMask = context.AddIntrinsic(Intrinsic.X86Pand, d, mMask); + + res = context.AddIntrinsic(Intrinsic.X86Por, res, dMask); + } + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else + { + Operand d = GetVec(op.Rd); + + List<Operand> args = new List<Operand>(); + + if (!isTbl) + { + args.Add(d); + } + + args.Add(GetVec(op.Rm)); + + args.Add(Const(op.RegisterSize == RegisterSize.Simd64 ? 8 : 16)); + + for (int index = 0; index < op.Size; index++) + { + args.Add(GetVec((op.Rn + index) & 0x1F)); + } + + MethodInfo info = null; + + if (isTbl) + { + switch (op.Size) + { + case 1: info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl1)); break; + case 2: info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl2)); break; + case 3: info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl3)); break; + case 4: info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl4)); break; + } + } + else + { + switch (op.Size) + { + case 1: info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx1)); break; + case 2: info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx2)); break; + case 3: info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx3)); break; + case 4: info = typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx4)); break; + } + } + + context.Copy(d, context.Call(info, args.ToArray())); + } + } + + private static void EmitVectorTranspose(ArmEmitterContext context, int part) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSsse3) + { + Operand mask = default; + + if (op.Size < 3) + { + long maskE0 = EvenMasks[op.Size]; + long maskE1 = OddMasks [op.Size]; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + + Operand n = GetVec(op.Rn); + + if (op.Size < 3) + { + n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); + } + + Operand m = GetVec(op.Rm); + + if (op.Size < 3) + { + m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask); + } + + Intrinsic punpckInst = part == 0 + ? X86PunpcklInstruction[op.Size] + : X86PunpckhInstruction[op.Size]; + + Operand res = context.AddIntrinsic(punpckInst, n, m); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand res = context.VectorZero(); + + int pairs = op.GetPairsCount() >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand ne = EmitVectorExtractZx(context, op.Rn, pairIndex + part, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, pairIndex + part, op.Size); + + res = EmitVectorInsert(context, res, ne, pairIndex, op.Size); + res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + private static void EmitVectorUnzip(ArmEmitterContext context, int part) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSsse3) + { + if (op.RegisterSize == RegisterSize.Simd128) + { + Operand mask = default; + + if (op.Size < 3) + { + long maskE0 = EvenMasks[op.Size]; + long maskE1 = OddMasks [op.Size]; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + + Operand n = GetVec(op.Rn); + + if (op.Size < 3) + { + n = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mask); + } + + Operand m = GetVec(op.Rm); + + if (op.Size < 3) + { + m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask); + } + + Intrinsic punpckInst = part == 0 + ? Intrinsic.X86Punpcklqdq + : Intrinsic.X86Punpckhqdq; + + Operand res = context.AddIntrinsic(punpckInst, n, m); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + Intrinsic punpcklInst = X86PunpcklInstruction[op.Size]; + + Operand res = context.AddIntrinsic(punpcklInst, n, m); + + if (op.Size < 2) + { + long maskE0 = _masksE0_Uzp[op.Size]; + long maskE1 = _masksE1_Uzp[op.Size]; + + Operand mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + + res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask); + } + + Intrinsic punpckInst = part == 0 + ? Intrinsic.X86Punpcklqdq + : Intrinsic.X86Punpckhqdq; + + res = context.AddIntrinsic(punpckInst, res, context.VectorZero()); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + Operand res = context.VectorZero(); + + int pairs = op.GetPairsCount() >> op.Size; + + for (int index = 0; index < pairs; index++) + { + int idx = index << 1; + + Operand ne = EmitVectorExtractZx(context, op.Rn, idx + part, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, idx + part, op.Size); + + res = EmitVectorInsert(context, res, ne, index, op.Size); + res = EmitVectorInsert(context, res, me, pairs + index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + private static void EmitVectorZip(ArmEmitterContext context, int part) + { + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + if (Optimizations.UseSse2) + { + Operand n = GetVec(op.Rn); + Operand m = GetVec(op.Rm); + + if (op.RegisterSize == RegisterSize.Simd128) + { + Intrinsic punpckInst = part == 0 + ? X86PunpcklInstruction[op.Size] + : X86PunpckhInstruction[op.Size]; + + Operand res = context.AddIntrinsic(punpckInst, n, m); + + context.Copy(GetVec(op.Rd), res); + } + else + { + Operand res = context.AddIntrinsic(X86PunpcklInstruction[op.Size], n, m); + + Intrinsic punpckInst = part == 0 + ? Intrinsic.X86Punpcklqdq + : Intrinsic.X86Punpckhqdq; + + res = context.AddIntrinsic(punpckInst, res, context.VectorZero()); + + context.Copy(GetVec(op.Rd), res); + } + } + else + { + Operand res = context.VectorZero(); + + int pairs = op.GetPairsCount() >> op.Size; + + int baseIndex = part != 0 ? pairs : 0; + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + + Operand ne = EmitVectorExtractZx(context, op.Rn, baseIndex + index, op.Size); + Operand me = EmitVectorExtractZx(context, op.Rm, baseIndex + index, op.Size); + + res = EmitVectorInsert(context, res, ne, pairIndex, op.Size); + res = EmitVectorInsert(context, res, me, pairIndex + 1, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitSimdMove32.cs b/src/ARMeilleure/Instructions/InstEmitSimdMove32.cs new file mode 100644 index 00000000..b8b91b31 --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSimdMove32.cs @@ -0,0 +1,656 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper32; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + #region "Masks" + // Same as InstEmitSimdMove, as the instructions do the same thing. + private static readonly long[] _masksE0_Uzp = new long[] + { + 13L << 56 | 09L << 48 | 05L << 40 | 01L << 32 | 12L << 24 | 08L << 16 | 04L << 8 | 00L << 0, + 11L << 56 | 10L << 48 | 03L << 40 | 02L << 32 | 09L << 24 | 08L << 16 | 01L << 8 | 00L << 0 + }; + + private static readonly long[] _masksE1_Uzp = new long[] + { + 15L << 56 | 11L << 48 | 07L << 40 | 03L << 32 | 14L << 24 | 10L << 16 | 06L << 8 | 02L << 0, + 15L << 56 | 14L << 48 | 07L << 40 | 06L << 32 | 13L << 24 | 12L << 16 | 05L << 8 | 04L << 0 + }; + #endregion + + public static void Vmov_I(ArmEmitterContext context) + { + EmitVectorImmUnaryOp32(context, (op1) => op1); + } + + public static void Vmvn_I(ArmEmitterContext context) + { + if (Optimizations.UseAvx512Ortho) + { + EmitVectorUnaryOpSimd32(context, (op1) => + { + return context.AddIntrinsic(Intrinsic.X86Vpternlogd, op1, op1, Const(0b01010101)); + }); + } + else if (Optimizations.UseSse2) + { + EmitVectorUnaryOpSimd32(context, (op1) => + { + Operand mask = X86GetAllElements(context, -1L); + return context.AddIntrinsic(Intrinsic.X86Pandn, op1, mask); + }); + } + else + { + EmitVectorUnaryOpZx32(context, (op1) => context.BitwiseNot(op1)); + } + } + + public static void Vmvn_II(ArmEmitterContext context) + { + EmitVectorImmUnaryOp32(context, (op1) => context.BitwiseNot(op1)); + } + + public static void Vmov_GS(ArmEmitterContext context) + { + OpCode32SimdMovGp op = (OpCode32SimdMovGp)context.CurrOp; + + Operand vec = GetVecA32(op.Vn >> 2); + if (op.Op == 1) + { + // To general purpose. + Operand value = context.VectorExtract(OperandType.I32, vec, op.Vn & 0x3); + SetIntA32(context, op.Rt, value); + } + else + { + // From general purpose. + Operand value = GetIntA32(context, op.Rt); + context.Copy(vec, context.VectorInsert(vec, value, op.Vn & 0x3)); + } + } + + public static void Vmov_G1(ArmEmitterContext context) + { + OpCode32SimdMovGpElem op = (OpCode32SimdMovGpElem)context.CurrOp; + + int index = op.Index + ((op.Vd & 1) << (3 - op.Size)); + if (op.Op == 1) + { + // To general purpose. + Operand value = EmitVectorExtract32(context, op.Vd >> 1, index, op.Size, !op.U); + SetIntA32(context, op.Rt, value); + } + else + { + // From general purpose. + Operand vec = GetVecA32(op.Vd >> 1); + Operand value = GetIntA32(context, op.Rt); + context.Copy(vec, EmitVectorInsert(context, vec, value, index, op.Size)); + } + } + + public static void Vmov_G2(ArmEmitterContext context) + { + OpCode32SimdMovGpDouble op = (OpCode32SimdMovGpDouble)context.CurrOp; + + Operand vec = GetVecA32(op.Vm >> 2); + int vm1 = op.Vm + 1; + bool sameOwnerVec = (op.Vm >> 2) == (vm1 >> 2); + Operand vec2 = sameOwnerVec ? vec : GetVecA32(vm1 >> 2); + if (op.Op == 1) + { + // To general purpose. + Operand lowValue = context.VectorExtract(OperandType.I32, vec, op.Vm & 3); + SetIntA32(context, op.Rt, lowValue); + + Operand highValue = context.VectorExtract(OperandType.I32, vec2, vm1 & 3); + SetIntA32(context, op.Rt2, highValue); + } + else + { + // From general purpose. + Operand lowValue = GetIntA32(context, op.Rt); + Operand resultVec = context.VectorInsert(vec, lowValue, op.Vm & 3); + + Operand highValue = GetIntA32(context, op.Rt2); + + if (sameOwnerVec) + { + context.Copy(vec, context.VectorInsert(resultVec, highValue, vm1 & 3)); + } + else + { + context.Copy(vec, resultVec); + context.Copy(vec2, context.VectorInsert(vec2, highValue, vm1 & 3)); + } + } + } + + public static void Vmov_GD(ArmEmitterContext context) + { + OpCode32SimdMovGpDouble op = (OpCode32SimdMovGpDouble)context.CurrOp; + + Operand vec = GetVecA32(op.Vm >> 1); + if (op.Op == 1) + { + // To general purpose. + Operand value = context.VectorExtract(OperandType.I64, vec, op.Vm & 1); + SetIntA32(context, op.Rt, context.ConvertI64ToI32(value)); + SetIntA32(context, op.Rt2, context.ConvertI64ToI32(context.ShiftRightUI(value, Const(32)))); + } + else + { + // From general purpose. + Operand lowValue = GetIntA32(context, op.Rt); + Operand highValue = GetIntA32(context, op.Rt2); + + Operand value = context.BitwiseOr( + context.ZeroExtend32(OperandType.I64, lowValue), + context.ShiftLeft(context.ZeroExtend32(OperandType.I64, highValue), Const(32))); + + context.Copy(vec, context.VectorInsert(vec, value, op.Vm & 1)); + } + } + + public static void Vmovl(ArmEmitterContext context) + { + OpCode32SimdLong op = (OpCode32SimdLong)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, !op.U); + + if (op.Size == 2) + { + if (op.U) + { + me = context.ZeroExtend32(OperandType.I64, me); + } + else + { + me = context.SignExtend32(OperandType.I64, me); + } + } + + res = EmitVectorInsert(context, res, me, index, op.Size + 1); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void Vtbl(ArmEmitterContext context) + { + OpCode32SimdTbl op = (OpCode32SimdTbl)context.CurrOp; + + bool extension = op.Opc == 1; + int length = op.Length + 1; + + if (Optimizations.UseSsse3) + { + Operand d = GetVecA32(op.Qd); + Operand m = EmitMoveDoubleWordToSide(context, GetVecA32(op.Qm), op.Vm, 0); + + Operand res; + Operand mask = X86GetAllElements(context, 0x0707070707070707L); + + // Fast path for single register table. + { + Operand n = EmitMoveDoubleWordToSide(context, GetVecA32(op.Qn), op.Vn, 0); + + Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, mask); + mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, m); + + res = context.AddIntrinsic(Intrinsic.X86Pshufb, n, mMask); + } + + for (int index = 1; index < length; index++) + { + int newVn = (op.Vn + index) & 0x1F; + (int qn, int ind) = GetQuadwordAndSubindex(newVn, op.RegisterSize); + Operand ni = EmitMoveDoubleWordToSide(context, GetVecA32(qn), newVn, 0); + + Operand idxMask = X86GetAllElements(context, 0x0808080808080808L * index); + + Operand mSubMask = context.AddIntrinsic(Intrinsic.X86Psubb, m, idxMask); + + Operand mMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, mSubMask, mask); + mMask = context.AddIntrinsic(Intrinsic.X86Por, mMask, mSubMask); + + Operand res2 = context.AddIntrinsic(Intrinsic.X86Pshufb, ni, mMask); + + res = context.AddIntrinsic(Intrinsic.X86Por, res, res2); + } + + if (extension) + { + Operand idxMask = X86GetAllElements(context, (0x0808080808080808L * length) - 0x0101010101010101L); + Operand zeroMask = context.VectorZero(); + + Operand mPosMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, m, idxMask); + Operand mNegMask = context.AddIntrinsic(Intrinsic.X86Pcmpgtb, zeroMask, m); + + Operand mMask = context.AddIntrinsic(Intrinsic.X86Por, mPosMask, mNegMask); + + Operand dMask = context.AddIntrinsic(Intrinsic.X86Pand, EmitMoveDoubleWordToSide(context, d, op.Vd, 0), mMask); + + res = context.AddIntrinsic(Intrinsic.X86Por, res, dMask); + } + + res = EmitMoveDoubleWordToSide(context, res, 0, op.Vd); + + context.Copy(d, EmitDoubleWordInsert(context, d, res, op.Vd)); + } + else + { + int elems = op.GetBytesCount() >> op.Size; + + (int Qx, int Ix)[] tableTuples = new (int, int)[length]; + for (int i = 0; i < length; i++) + { + tableTuples[i] = GetQuadwordAndSubindex(op.Vn + i, op.RegisterSize); + } + + int byteLength = length * 8; + + Operand res = GetVecA32(op.Qd); + Operand m = GetVecA32(op.Qm); + + for (int index = 0; index < elems; index++) + { + Operand selectedIndex = context.ZeroExtend8(OperandType.I32, context.VectorExtract8(m, index + op.Im)); + + Operand inRange = context.ICompareLess(selectedIndex, Const(byteLength)); + Operand elemRes = default; // Note: This is I64 for ease of calculation. + + // TODO: Branching rather than conditional select. + + // Get indexed byte. + // To simplify (ha) the il, we get bytes from every vector and use a nested conditional select to choose the right result. + // This does have to extract `length` times for every element but certainly not as bad as it could be. + + // Which vector number is the index on. + Operand vecIndex = context.ShiftRightUI(selectedIndex, Const(3)); + // What should we shift by to extract it. + Operand subVecIndexShift = context.ShiftLeft(context.BitwiseAnd(selectedIndex, Const(7)), Const(3)); + + for (int i = 0; i < length; i++) + { + (int qx, int ix) = tableTuples[i]; + // Get the whole vector, we'll get a byte out of it. + Operand lookupResult; + if (qx == op.Qd) + { + // Result contains the current state of the vector. + lookupResult = context.VectorExtract(OperandType.I64, res, ix); + } + else + { + lookupResult = EmitVectorExtract32(context, qx, ix, 3, false); // I64 + } + + lookupResult = context.ShiftRightUI(lookupResult, subVecIndexShift); // Get the relevant byte from this vector. + + if (i == 0) + { + elemRes = lookupResult; // First result is always default. + } + else + { + Operand isThisElem = context.ICompareEqual(vecIndex, Const(i)); + elemRes = context.ConditionalSelect(isThisElem, lookupResult, elemRes); + } + } + + Operand fallback = (extension) ? context.ZeroExtend32(OperandType.I64, EmitVectorExtract32(context, op.Qd, index + op.Id, 0, false)) : Const(0L); + + res = EmitVectorInsert(context, res, context.ConditionalSelect(inRange, elemRes, fallback), index + op.Id, 0); + } + + context.Copy(GetVecA32(op.Qd), res); + } + } + + public static void Vtrn(ArmEmitterContext context) + { + OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; + + if (Optimizations.UseSsse3) + { + EmitVectorShuffleOpSimd32(context, (m, d) => + { + Operand mask = default; + + if (op.Size < 3) + { + long maskE0 = EvenMasks[op.Size]; + long maskE1 = OddMasks[op.Size]; + + mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + } + + if (op.Size < 3) + { + d = context.AddIntrinsic(Intrinsic.X86Pshufb, d, mask); + m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask); + } + + Operand resD = context.AddIntrinsic(X86PunpcklInstruction[op.Size], d, m); + Operand resM = context.AddIntrinsic(X86PunpckhInstruction[op.Size], d, m); + + return (resM, resD); + }); + } + else + { + int elems = op.GetBytesCount() >> op.Size; + int pairs = elems >> 1; + + bool overlap = op.Qm == op.Qd; + + Operand resD = GetVecA32(op.Qd); + Operand resM = GetVecA32(op.Qm); + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + Operand d2 = EmitVectorExtract32(context, op.Qd, pairIndex + 1 + op.Id, op.Size, false); + Operand m1 = EmitVectorExtract32(context, op.Qm, pairIndex + op.Im, op.Size, false); + + resD = EmitVectorInsert(context, resD, m1, pairIndex + 1 + op.Id, op.Size); + + if (overlap) + { + resM = resD; + } + + resM = EmitVectorInsert(context, resM, d2, pairIndex + op.Im, op.Size); + + if (overlap) + { + resD = resM; + } + } + + context.Copy(GetVecA32(op.Qd), resD); + if (!overlap) + { + context.Copy(GetVecA32(op.Qm), resM); + } + } + } + + public static void Vzip(ArmEmitterContext context) + { + OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; + + if (Optimizations.UseAdvSimd) + { + EmitVectorZipUzpOpSimd32(context, Intrinsic.Arm64Zip1V, Intrinsic.Arm64Zip2V); + } + else if (Optimizations.UseSse2) + { + EmitVectorShuffleOpSimd32(context, (m, d) => + { + if (op.RegisterSize == RegisterSize.Simd128) + { + Operand resD = context.AddIntrinsic(X86PunpcklInstruction[op.Size], d, m); + Operand resM = context.AddIntrinsic(X86PunpckhInstruction[op.Size], d, m); + + return (resM, resD); + } + else + { + Operand res = context.AddIntrinsic(X86PunpcklInstruction[op.Size], d, m); + + Operand resD = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, res, context.VectorZero()); + Operand resM = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, res, context.VectorZero()); + return (resM, resD); + } + }); + } + else + { + int elems = op.GetBytesCount() >> op.Size; + int pairs = elems >> 1; + + bool overlap = op.Qm == op.Qd; + + Operand resD = GetVecA32(op.Qd); + Operand resM = GetVecA32(op.Qm); + + for (int index = 0; index < pairs; index++) + { + int pairIndex = index << 1; + Operand dRowD = EmitVectorExtract32(context, op.Qd, index + op.Id, op.Size, false); + Operand mRowD = EmitVectorExtract32(context, op.Qm, index + op.Im, op.Size, false); + + Operand dRowM = EmitVectorExtract32(context, op.Qd, index + op.Id + pairs, op.Size, false); + Operand mRowM = EmitVectorExtract32(context, op.Qm, index + op.Im + pairs, op.Size, false); + + resD = EmitVectorInsert(context, resD, dRowD, pairIndex + op.Id, op.Size); + resD = EmitVectorInsert(context, resD, mRowD, pairIndex + 1 + op.Id, op.Size); + + if (overlap) + { + resM = resD; + } + + resM = EmitVectorInsert(context, resM, dRowM, pairIndex + op.Im, op.Size); + resM = EmitVectorInsert(context, resM, mRowM, pairIndex + 1 + op.Im, op.Size); + + if (overlap) + { + resD = resM; + } + } + + context.Copy(GetVecA32(op.Qd), resD); + if (!overlap) + { + context.Copy(GetVecA32(op.Qm), resM); + } + } + } + + public static void Vuzp(ArmEmitterContext context) + { + OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; + + if (Optimizations.UseAdvSimd) + { + EmitVectorZipUzpOpSimd32(context, Intrinsic.Arm64Uzp1V, Intrinsic.Arm64Uzp2V); + } + else if (Optimizations.UseSsse3) + { + EmitVectorShuffleOpSimd32(context, (m, d) => + { + if (op.RegisterSize == RegisterSize.Simd128) + { + Operand mask = default; + + if (op.Size < 3) + { + long maskE0 = EvenMasks[op.Size]; + long maskE1 = OddMasks[op.Size]; + + mask = X86GetScalar(context, maskE0); + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + + d = context.AddIntrinsic(Intrinsic.X86Pshufb, d, mask); + m = context.AddIntrinsic(Intrinsic.X86Pshufb, m, mask); + } + + Operand resD = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, d, m); + Operand resM = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, d, m); + + return (resM, resD); + } + else + { + Intrinsic punpcklInst = X86PunpcklInstruction[op.Size]; + + Operand res = context.AddIntrinsic(punpcklInst, d, m); + + if (op.Size < 2) + { + long maskE0 = _masksE0_Uzp[op.Size]; + long maskE1 = _masksE1_Uzp[op.Size]; + + Operand mask = X86GetScalar(context, maskE0); + + mask = EmitVectorInsert(context, mask, Const(maskE1), 1, 3); + + res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask); + } + + Operand resD = context.AddIntrinsic(Intrinsic.X86Punpcklqdq, res, context.VectorZero()); + Operand resM = context.AddIntrinsic(Intrinsic.X86Punpckhqdq, res, context.VectorZero()); + + return (resM, resD); + } + }); + } + else + { + int elems = op.GetBytesCount() >> op.Size; + int pairs = elems >> 1; + + bool overlap = op.Qm == op.Qd; + + Operand resD = GetVecA32(op.Qd); + Operand resM = GetVecA32(op.Qm); + + for (int index = 0; index < elems; index++) + { + Operand dIns, mIns; + if (index >= pairs) + { + int pairIndex = index - pairs; + dIns = EmitVectorExtract32(context, op.Qm, (pairIndex << 1) + op.Im, op.Size, false); + mIns = EmitVectorExtract32(context, op.Qm, ((pairIndex << 1) | 1) + op.Im, op.Size, false); + } + else + { + dIns = EmitVectorExtract32(context, op.Qd, (index << 1) + op.Id, op.Size, false); + mIns = EmitVectorExtract32(context, op.Qd, ((index << 1) | 1) + op.Id, op.Size, false); + } + + resD = EmitVectorInsert(context, resD, dIns, index + op.Id, op.Size); + + if (overlap) + { + resM = resD; + } + + resM = EmitVectorInsert(context, resM, mIns, index + op.Im, op.Size); + + if (overlap) + { + resD = resM; + } + } + + context.Copy(GetVecA32(op.Qd), resD); + if (!overlap) + { + context.Copy(GetVecA32(op.Qm), resM); + } + } + } + + private static void EmitVectorZipUzpOpSimd32(ArmEmitterContext context, Intrinsic inst1, Intrinsic inst2) + { + OpCode32SimdCmpZ op = (OpCode32SimdCmpZ)context.CurrOp; + + bool overlap = op.Qm == op.Qd; + + Operand d = GetVecA32(op.Qd); + Operand m = GetVecA32(op.Qm); + + Operand dPart = d; + Operand mPart = m; + + if (!op.Q) // Register swap: move relevant doubleword to destination side. + { + dPart = InstEmitSimdHelper32Arm64.EmitMoveDoubleWordToSide(context, d, op.Vd, 0); + mPart = InstEmitSimdHelper32Arm64.EmitMoveDoubleWordToSide(context, m, op.Vm, 0); + } + + Intrinsic vSize = op.Q ? Intrinsic.Arm64V128 : Intrinsic.Arm64V64; + + vSize |= (Intrinsic)(op.Size << (int)Intrinsic.Arm64VSizeShift); + + Operand resD = context.AddIntrinsic(inst1 | vSize, dPart, mPart); + Operand resM = context.AddIntrinsic(inst2 | vSize, dPart, mPart); + + if (!op.Q) // Register insert. + { + resD = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, d, Const(op.Vd & 1), resD, Const(0)); + + if (overlap) + { + resD = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, resD, Const(op.Vm & 1), resM, Const(0)); + } + else + { + resM = context.AddIntrinsic(Intrinsic.Arm64InsVe | Intrinsic.Arm64VDWord, m, Const(op.Vm & 1), resM, Const(0)); + } + } + + context.Copy(d, resD); + if (!overlap) + { + context.Copy(m, resM); + } + } + + private static void EmitVectorShuffleOpSimd32(ArmEmitterContext context, Func<Operand, Operand, (Operand, Operand)> shuffleFunc) + { + OpCode32Simd op = (OpCode32Simd)context.CurrOp; + + Operand m = GetVecA32(op.Qm); + Operand d = GetVecA32(op.Qd); + Operand initialM = m; + Operand initialD = d; + + if (!op.Q) // Register swap: move relevant doubleword to side 0, for consistency. + { + m = EmitMoveDoubleWordToSide(context, m, op.Vm, 0); + d = EmitMoveDoubleWordToSide(context, d, op.Vd, 0); + } + + (Operand resM, Operand resD) = shuffleFunc(m, d); + + bool overlap = op.Qm == op.Qd; + + if (!op.Q) // Register insert. + { + resM = EmitDoubleWordInsert(context, initialM, EmitMoveDoubleWordToSide(context, resM, 0, op.Vm), op.Vm); + resD = EmitDoubleWordInsert(context, overlap ? resM : initialD, EmitMoveDoubleWordToSide(context, resD, 0, op.Vd), op.Vd); + } + + if (!overlap) + { + context.Copy(initialM, resM); + } + + context.Copy(initialD, resD); + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitSimdShift.cs b/src/ARMeilleure/Instructions/InstEmitSimdShift.cs new file mode 100644 index 00000000..19e41119 --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSimdShift.cs @@ -0,0 +1,1827 @@ +// https://github.com/intel/ARM_NEON_2_x86_SSE/blob/master/NEON_2_SSE.h + +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; +using System.Reflection; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + using Func2I = Func<Operand, Operand, Operand>; + + static partial class InstEmit + { +#region "Masks" + private static readonly long[] _masks_SliSri = new long[] // Replication masks. + { + 0x0101010101010101L, 0x0001000100010001L, 0x0000000100000001L, 0x0000000000000001L + }; +#endregion + + public static void Rshrn_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64RshrnV, shift); + } + else if (Optimizations.UseSsse3) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Operand dLow = context.VectorZeroUpper64(d); + + Operand mask = default; + + switch (op.Size + 1) + { + case 1: mask = X86GetAllElements(context, (int)roundConst * 0x00010001); break; + case 2: mask = X86GetAllElements(context, (int)roundConst); break; + case 3: mask = X86GetAllElements(context, roundConst); break; + } + + Intrinsic addInst = X86PaddInstruction[op.Size + 1]; + + Operand res = context.AddIntrinsic(addInst, n, mask); + + Intrinsic srlInst = X86PsrlInstruction[op.Size + 1]; + + res = context.AddIntrinsic(srlInst, res, Const(shift)); + + Operand mask2 = X86GetAllElements(context, EvenMasks[op.Size]); + + res = context.AddIntrinsic(Intrinsic.X86Pshufb, res, mask2); + + Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 + ? Intrinsic.X86Movlhps + : Intrinsic.X86Movhlps; + + res = context.AddIntrinsic(movInst, dLow, res); + + context.Copy(d, res); + } + else + { + EmitVectorShrImmNarrowOpZx(context, round: true); + } + } + + public static void Shl_S(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64ShlS, shift); + } + else + { + EmitScalarUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift))); + } + } + + public static void Shl_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + int eSize = 8 << op.Size; + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64ShlV, shift); + } + else if (shift >= eSize) + { + if ((op.RegisterSize == RegisterSize.Simd64)) + { + Operand res = context.VectorZeroUpper64(GetVec(op.Rd)); + + context.Copy(GetVec(op.Rd), res); + } + } + else if (Optimizations.UseGfni && op.Size == 0) + { + Operand n = GetVec(op.Rn); + + ulong bitMatrix = X86GetGf2p8LogicalShiftLeft(shift); + + Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix); + + Operand res = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else if (Optimizations.UseSse2 && op.Size > 0) + { + Operand n = GetVec(op.Rn); + + Intrinsic sllInst = X86PsllInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sllInst, n, Const(shift)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorUnaryOpZx(context, (op1) => context.ShiftLeft(op1, Const(shift))); + } + } + + public static void Shll_V(ArmEmitterContext context) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + int shift = 8 << op.Size; + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorUnaryOp(context, Intrinsic.Arm64ShllV); + } + else if (Optimizations.UseSse41) + { + Operand n = GetVec(op.Rn); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + } + + Intrinsic movsxInst = X86PmovsxInstruction[op.Size]; + + Operand res = context.AddIntrinsic(movsxInst, n); + + Intrinsic sllInst = X86PsllInstruction[op.Size + 1]; + + res = context.AddIntrinsic(sllInst, res, Const(shift)); + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift); + } + } + + public static void Shrn_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64ShrnV, shift); + } + else if (Optimizations.UseSsse3) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Operand dLow = context.VectorZeroUpper64(d); + + Intrinsic srlInst = X86PsrlInstruction[op.Size + 1]; + + Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift)); + + Operand mask = X86GetAllElements(context, EvenMasks[op.Size]); + + Operand res = context.AddIntrinsic(Intrinsic.X86Pshufb, nShifted, mask); + + Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 + ? Intrinsic.X86Movlhps + : Intrinsic.X86Movhlps; + + res = context.AddIntrinsic(movInst, dLow, res); + + context.Copy(d, res); + } + else + { + EmitVectorShrImmNarrowOpZx(context, round: false); + } + } + + public static void Sli_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + + InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SliS, shift); + } + else + { + EmitSli(context, scalar: true); + } + } + + public static void Sli_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + + InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SliV, shift); + } + else + { + EmitSli(context, scalar: false); + } + } + + public static void Sqrshl_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqrshlV); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round | ShlRegFlags.Saturating); + } + } + + public static void Sqrshrn_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrnS, shift); + } + else + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx); + } + } + + public static void Sqrshrn_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrnV, shift); + } + else + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx); + } + } + + public static void Sqrshrun_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrunS, shift); + } + else + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx); + } + } + + public static void Sqrshrun_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqrshrunV, shift); + } + else + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); + } + } + + public static void Sqshl_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64SqshlV); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Saturating); + } + } + + public static void Sqshrn_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrnS, shift); + } + else + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxSx); + } + } + + public static void Sqshrn_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrnV, shift); + } + else + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxSx); + } + } + + public static void Sqshrun_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrunS, shift); + } + else + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarSxZx); + } + } + + public static void Sqshrun_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64SqshrunV, shift); + } + else + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); + } + } + + public static void Sri_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SriS, shift); + } + else + { + EmitSri(context, scalar: true); + } + } + + public static void Sri_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SriV, shift); + } + else + { + EmitSri(context, scalar: false); + } + } + + public static void Srshl_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SrshlV); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Signed | ShlRegFlags.Round); + } + } + + public static void Srshr_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64SrshrS, shift); + } + else + { + EmitScalarShrImmOpSx(context, ShrImmFlags.Round); + } + } + + public static void Srshr_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseAdvSimd) + { + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SrshrV, shift); + } + else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) + { + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + Operand n = GetVec(op.Rn); + + Intrinsic sllInst = X86PsllInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift)); + + Intrinsic srlInst = X86PsrlInstruction[op.Size]; + + res = context.AddIntrinsic(srlInst, res, Const(eSize - 1)); + + Intrinsic sraInst = X86PsraInstruction[op.Size]; + + Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, nSra); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShrImmOpSx(context, ShrImmFlags.Round); + } + } + + public static void Srsra_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SrsraS, shift); + } + else + { + EmitScalarShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + } + + public static void Srsra_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseAdvSimd) + { + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SrsraV, shift); + } + else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) + { + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Intrinsic sllInst = X86PsllInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift)); + + Intrinsic srlInst = X86PsrlInstruction[op.Size]; + + res = context.AddIntrinsic(srlInst, res, Const(eSize - 1)); + + Intrinsic sraInst = X86PsraInstruction[op.Size]; + + Operand nSra = context.AddIntrinsic(sraInst, n, Const(shift)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, nSra); + res = context.AddIntrinsic(addInst, res, d); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else + { + EmitVectorShrImmOpSx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + } + + public static void Sshl_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64SshlS); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Scalar | ShlRegFlags.Signed); + } + } + + public static void Sshl_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64SshlV); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Signed); + } + } + + public static void Sshll_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SshllV, shift); + } + else if (Optimizations.UseSse41) + { + Operand n = GetVec(op.Rn); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + } + + Intrinsic movsxInst = X86PmovsxInstruction[op.Size]; + + Operand res = context.AddIntrinsic(movsxInst, n); + + if (shift != 0) + { + Intrinsic sllInst = X86PsllInstruction[op.Size + 1]; + + res = context.AddIntrinsic(sllInst, res, Const(shift)); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShImmWidenBinarySx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift); + } + } + + public static void Sshr_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64SshrS, shift); + } + else + { + EmitShrImmOp(context, ShrImmFlags.ScalarSx); + } + } + + public static void Sshr_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64SshrV, shift); + } + else if (Optimizations.UseGfni && op.Size == 0) + { + Operand n = GetVec(op.Rn); + + ulong bitMatrix; + + if (shift < 8) + { + bitMatrix = X86GetGf2p8LogicalShiftLeft(-shift); + + // Extend sign-bit + bitMatrix |= 0x8080808080808080UL >> (64 - shift * 8); + } + else + { + // Replicate sign-bit into all bits + bitMatrix = 0x8080808080808080UL; + } + + Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix); + + Operand res = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) + { + Operand n = GetVec(op.Rn); + + Intrinsic sraInst = X86PsraInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sraInst, n, Const(shift)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitShrImmOp(context, ShrImmFlags.VectorSx); + } + } + + public static void Ssra_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64SsraS, shift); + } + else + { + EmitScalarShrImmOpSx(context, ShrImmFlags.Accumulate); + } + } + + public static void Ssra_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseAdvSimd) + { + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64SsraV, shift); + } + else if (Optimizations.UseSse2 && op.Size > 0 && op.Size < 3) + { + int shift = GetImmShr(op); + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Intrinsic sraInst = X86PsraInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sraInst, n, Const(shift)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, d); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else + { + EmitVectorShrImmOpSx(context, ShrImmFlags.Accumulate); + } + } + + public static void Uqrshl_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqrshlV); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Round | ShlRegFlags.Saturating); + } + } + + public static void Uqrshrn_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqrshrnS, shift); + } + else + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx); + } + } + + public static void Uqrshrn_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqrshrnV, shift); + } + else + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx); + } + } + + public static void Uqshl_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorSaturatingBinaryOp(context, Intrinsic.Arm64UqshlV); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Saturating); + } + } + + public static void Uqshrn_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqshrnS, shift); + } + else + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.ScalarZxZx); + } + } + + public static void Uqshrn_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorSaturatingShiftTernaryOpRd(context, Intrinsic.Arm64UqshrnV, shift); + } + else + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorZxZx); + } + } + + public static void Urshl_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UrshlV); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Round); + } + } + + public static void Urshr_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64UrshrS, shift); + } + else + { + EmitScalarShrImmOpZx(context, ShrImmFlags.Round); + } + } + + public static void Urshr_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseAdvSimd) + { + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UrshrV, shift); + } + else if (Optimizations.UseSse2 && op.Size > 0) + { + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + Operand n = GetVec(op.Rn); + + Intrinsic sllInst = X86PsllInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift)); + + Intrinsic srlInst = X86PsrlInstruction[op.Size]; + + res = context.AddIntrinsic(srlInst, res, Const(eSize - 1)); + + Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, nSrl); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShrImmOpZx(context, ShrImmFlags.Round); + } + } + + public static void Ursra_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64UrsraS, shift); + } + else + { + EmitScalarShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + } + + public static void Ursra_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseAdvSimd) + { + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64UrsraV, shift); + } + else if (Optimizations.UseSse2 && op.Size > 0) + { + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Intrinsic sllInst = X86PsllInstruction[op.Size]; + + Operand res = context.AddIntrinsic(sllInst, n, Const(eSize - shift)); + + Intrinsic srlInst = X86PsrlInstruction[op.Size]; + + res = context.AddIntrinsic(srlInst, res, Const(eSize - 1)); + + Operand nSrl = context.AddIntrinsic(srlInst, n, Const(shift)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, nSrl); + res = context.AddIntrinsic(addInst, res, d); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else + { + EmitVectorShrImmOpZx(context, ShrImmFlags.Round | ShrImmFlags.Accumulate); + } + } + + public static void Ushl_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitScalarBinaryOp(context, Intrinsic.Arm64UshlS); + } + else + { + EmitShlRegOp(context, ShlRegFlags.Scalar); + } + } + + public static void Ushl_V(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorBinaryOp(context, Intrinsic.Arm64UshlV); + } + else + { + EmitShlRegOp(context, ShlRegFlags.None); + } + } + + public static void Ushll_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + + if (Optimizations.UseAdvSimd) + { + InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UshllV, shift); + } + else if (Optimizations.UseSse41) + { + Operand n = GetVec(op.Rn); + + if (op.RegisterSize == RegisterSize.Simd128) + { + n = context.AddIntrinsic(Intrinsic.X86Psrldq, n, Const(8)); + } + + Intrinsic movzxInst = X86PmovzxInstruction[op.Size]; + + Operand res = context.AddIntrinsic(movzxInst, n); + + if (shift != 0) + { + Intrinsic sllInst = X86PsllInstruction[op.Size + 1]; + + res = context.AddIntrinsic(sllInst, res, Const(shift)); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitVectorShImmWidenBinaryZx(context, (op1, op2) => context.ShiftLeft(op1, op2), shift); + } + } + + public static void Ushr_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftBinaryOp(context, Intrinsic.Arm64UshrS, shift); + } + else + { + EmitShrImmOp(context, ShrImmFlags.ScalarZx); + } + } + + public static void Ushr_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseAdvSimd) + { + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftBinaryOp(context, Intrinsic.Arm64UshrV, shift); + } + else if (Optimizations.UseSse2 && op.Size > 0) + { + int shift = GetImmShr(op); + + Operand n = GetVec(op.Rn); + + Intrinsic srlInst = X86PsrlInstruction[op.Size]; + + Operand res = context.AddIntrinsic(srlInst, n, Const(shift)); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(GetVec(op.Rd), res); + } + else + { + EmitShrImmOp(context, ShrImmFlags.VectorZx); + } + } + + public static void Usra_S(ArmEmitterContext context) + { + if (Optimizations.UseAdvSimd) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitScalarShiftTernaryOpRd(context, Intrinsic.Arm64UsraS, shift); + } + else + { + EmitScalarShrImmOpZx(context, ShrImmFlags.Accumulate); + } + } + + public static void Usra_V(ArmEmitterContext context) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + if (Optimizations.UseAdvSimd) + { + int shift = GetImmShr(op); + + InstEmitSimdHelperArm64.EmitVectorShiftTernaryOpRd(context, Intrinsic.Arm64UsraV, shift); + } + else if (Optimizations.UseSse2 && op.Size > 0) + { + int shift = GetImmShr(op); + + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Intrinsic srlInst = X86PsrlInstruction[op.Size]; + + Operand res = context.AddIntrinsic(srlInst, n, Const(shift)); + + Intrinsic addInst = X86PaddInstruction[op.Size]; + + res = context.AddIntrinsic(addInst, res, d); + + if (op.RegisterSize == RegisterSize.Simd64) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else + { + EmitVectorShrImmOpZx(context, ShrImmFlags.Accumulate); + } + } + + [Flags] + private enum ShrImmFlags + { + Scalar = 1 << 0, + Signed = 1 << 1, + + Round = 1 << 2, + Accumulate = 1 << 3, + + ScalarSx = Scalar | Signed, + ScalarZx = Scalar, + + VectorSx = Signed, + VectorZx = 0 + } + + private static void EmitScalarShrImmOpSx(ArmEmitterContext context, ShrImmFlags flags) + { + EmitShrImmOp(context, ShrImmFlags.ScalarSx | flags); + } + + private static void EmitScalarShrImmOpZx(ArmEmitterContext context, ShrImmFlags flags) + { + EmitShrImmOp(context, ShrImmFlags.ScalarZx | flags); + } + + private static void EmitVectorShrImmOpSx(ArmEmitterContext context, ShrImmFlags flags) + { + EmitShrImmOp(context, ShrImmFlags.VectorSx | flags); + } + + private static void EmitVectorShrImmOpZx(ArmEmitterContext context, ShrImmFlags flags) + { + EmitShrImmOp(context, ShrImmFlags.VectorZx | flags); + } + + private static void EmitShrImmOp(ArmEmitterContext context, ShrImmFlags flags) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + Operand res = context.VectorZero(); + + bool scalar = (flags & ShrImmFlags.Scalar) != 0; + bool signed = (flags & ShrImmFlags.Signed) != 0; + bool round = (flags & ShrImmFlags.Round) != 0; + bool accumulate = (flags & ShrImmFlags.Accumulate) != 0; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + for (int index = 0; index < elems; index++) + { + Operand e = EmitVectorExtract(context, op.Rn, index, op.Size, signed); + + if (op.Size <= 2) + { + if (round) + { + e = context.Add(e, Const(roundConst)); + } + + e = signed ? context.ShiftRightSI(e, Const(shift)) : context.ShiftRightUI(e, Const(shift)); + } + else /* if (op.Size == 3) */ + { + e = EmitShrImm64(context, e, signed, round ? roundConst : 0L, shift); + } + + if (accumulate) + { + Operand de = EmitVectorExtract(context, op.Rd, index, op.Size, signed); + + e = context.Add(e, de); + } + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitVectorShrImmNarrowOpZx(ArmEmitterContext context, bool round) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + Operand d = GetVec(op.Rd); + + Operand res = part == 0 ? context.VectorZero() : context.Copy(d); + + for (int index = 0; index < elems; index++) + { + Operand e = EmitVectorExtractZx(context, op.Rn, index, op.Size + 1); + + if (round) + { + e = context.Add(e, Const(roundConst)); + } + + e = context.ShiftRightUI(e, Const(shift)); + + res = EmitVectorInsert(context, res, e, part + index, op.Size); + } + + context.Copy(d, res); + } + + [Flags] + private enum ShrImmSaturatingNarrowFlags + { + Scalar = 1 << 0, + SignedSrc = 1 << 1, + SignedDst = 1 << 2, + + Round = 1 << 3, + + ScalarSxSx = Scalar | SignedSrc | SignedDst, + ScalarSxZx = Scalar | SignedSrc, + ScalarZxZx = Scalar, + + VectorSxSx = SignedSrc | SignedDst, + VectorSxZx = SignedSrc, + VectorZxZx = 0 + } + + private static void EmitRoundShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags); + } + + private static void EmitShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0; + bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0; + bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0; + bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0; + + int shift = GetImmShr(op); + + long roundConst = 1L << (shift - 1); + + int elems = !scalar ? 8 >> op.Size : 1; + + int part = !scalar && (op.RegisterSize == RegisterSize.Simd128) ? elems : 0; + + Operand d = GetVec(op.Rd); + + Operand res = part == 0 ? context.VectorZero() : context.Copy(d); + + for (int index = 0; index < elems; index++) + { + Operand e = EmitVectorExtract(context, op.Rn, index, op.Size + 1, signedSrc); + + if (op.Size <= 1 || !round) + { + if (round) + { + e = context.Add(e, Const(roundConst)); + } + + e = signedSrc ? context.ShiftRightSI(e, Const(shift)) : context.ShiftRightUI(e, Const(shift)); + } + else /* if (op.Size == 2 && round) */ + { + e = EmitShrImm64(context, e, signedSrc, roundConst, shift); // shift <= 32 + } + + e = signedSrc ? EmitSignedSrcSatQ(context, e, op.Size, signedDst) : EmitUnsignedSrcSatQ(context, e, op.Size, signedDst); + + res = EmitVectorInsert(context, res, e, part + index, op.Size); + } + + context.Copy(d, res); + } + + // dst64 = (Int(src64, signed) + roundConst) >> shift; + private static Operand EmitShrImm64( + ArmEmitterContext context, + Operand value, + bool signed, + long roundConst, + int shift) + { + MethodInfo info = signed + ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SignedShrImm64)) + : typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedShrImm64)); + + return context.Call(info, value, Const(roundConst), Const(shift)); + } + + private static void EmitVectorShImmWidenBinarySx(ArmEmitterContext context, Func2I emit, int imm) + { + EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: true); + } + + private static void EmitVectorShImmWidenBinaryZx(ArmEmitterContext context, Func2I emit, int imm) + { + EmitVectorShImmWidenBinaryOp(context, emit, imm, signed: false); + } + + private static void EmitVectorShImmWidenBinaryOp(ArmEmitterContext context, Func2I emit, int imm, bool signed) + { + OpCodeSimd op = (OpCodeSimd)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = 8 >> op.Size; + + int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, part + index, op.Size, signed); + + res = EmitVectorInsert(context, res, emit(ne, Const(imm)), index, op.Size + 1); + } + + context.Copy(GetVec(op.Rd), res); + } + + private static void EmitSli(ArmEmitterContext context, bool scalar) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShl(op); + int eSize = 8 << op.Size; + + ulong mask = shift != 0 ? ulong.MaxValue >> (64 - shift) : 0UL; + + if (shift >= eSize) + { + if ((op.RegisterSize == RegisterSize.Simd64) || scalar) + { + Operand res = context.VectorZeroUpper64(GetVec(op.Rd)); + + context.Copy(GetVec(op.Rd), res); + } + } + else if (Optimizations.UseGfni && op.Size == 0) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + ulong bitMatrix = X86GetGf2p8LogicalShiftLeft(shift); + + Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix); + + Operand nShifted = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0)); + + Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]); + + Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask); + + Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked); + + if ((op.RegisterSize == RegisterSize.Simd64) || scalar) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else if (Optimizations.UseSse2 && op.Size > 0) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Intrinsic sllInst = X86PsllInstruction[op.Size]; + + Operand nShifted = context.AddIntrinsic(sllInst, n, Const(shift)); + + Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]); + + Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask); + + Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked); + + if ((op.RegisterSize == RegisterSize.Simd64) || scalar) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else + { + Operand res = context.VectorZero(); + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + Operand neShifted = context.ShiftLeft(ne, Const(shift)); + + Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); + + Operand deMasked = context.BitwiseAnd(de, Const(mask)); + + Operand e = context.BitwiseOr(neShifted, deMasked); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + private static void EmitSri(ArmEmitterContext context, bool scalar) + { + OpCodeSimdShImm op = (OpCodeSimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + int eSize = 8 << op.Size; + + ulong mask = (ulong.MaxValue << (eSize - shift)) & (ulong.MaxValue >> (64 - eSize)); + + if (shift >= eSize) + { + if ((op.RegisterSize == RegisterSize.Simd64) || scalar) + { + Operand res = context.VectorZeroUpper64(GetVec(op.Rd)); + + context.Copy(GetVec(op.Rd), res); + } + } + else if (Optimizations.UseGfni && op.Size == 0) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + ulong bitMatrix = X86GetGf2p8LogicalShiftLeft(-shift); + + Operand vBitMatrix = X86GetElements(context, bitMatrix, bitMatrix); + + Operand nShifted = context.AddIntrinsic(Intrinsic.X86Gf2p8affineqb, n, vBitMatrix, Const(0)); + + Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]); + + Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask); + + Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked); + + if ((op.RegisterSize == RegisterSize.Simd64) || scalar) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else if (Optimizations.UseSse2 && op.Size > 0) + { + Operand d = GetVec(op.Rd); + Operand n = GetVec(op.Rn); + + Intrinsic srlInst = X86PsrlInstruction[op.Size]; + + Operand nShifted = context.AddIntrinsic(srlInst, n, Const(shift)); + + Operand dMask = X86GetAllElements(context, (long)mask * _masks_SliSri[op.Size]); + + Operand dMasked = context.AddIntrinsic(Intrinsic.X86Pand, d, dMask); + + Operand res = context.AddIntrinsic(Intrinsic.X86Por, nShifted, dMasked); + + if ((op.RegisterSize == RegisterSize.Simd64) || scalar) + { + res = context.VectorZeroUpper64(res); + } + + context.Copy(d, res); + } + else + { + Operand res = context.VectorZero(); + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtractZx(context, op.Rn, index, op.Size); + + Operand neShifted = shift != 64 ? context.ShiftRightUI(ne, Const(shift)) : Const(0UL); + + Operand de = EmitVectorExtractZx(context, op.Rd, index, op.Size); + + Operand deMasked = context.BitwiseAnd(de, Const(mask)); + + Operand e = context.BitwiseOr(neShifted, deMasked); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + } + + [Flags] + private enum ShlRegFlags + { + None = 0, + Scalar = 1 << 0, + Signed = 1 << 1, + Round = 1 << 2, + Saturating = 1 << 3 + } + + private static void EmitShlRegOp(ArmEmitterContext context, ShlRegFlags flags = ShlRegFlags.None) + { + bool scalar = flags.HasFlag(ShlRegFlags.Scalar); + bool signed = flags.HasFlag(ShlRegFlags.Signed); + bool round = flags.HasFlag(ShlRegFlags.Round); + bool saturating = flags.HasFlag(ShlRegFlags.Saturating); + + OpCodeSimdReg op = (OpCodeSimdReg)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = !scalar ? op.GetBytesCount() >> op.Size : 1; + + for (int index = 0; index < elems; index++) + { + Operand ne = EmitVectorExtract(context, op.Rn, index, op.Size, signed); + Operand me = EmitVectorExtractSx(context, op.Rm, index << op.Size, size: 0); + + Operand e = !saturating + ? EmitShlReg(context, ne, context.ConvertI64ToI32(me), round, op.Size, signed) + : EmitShlRegSatQ(context, ne, context.ConvertI64ToI32(me), round, op.Size, signed); + + res = EmitVectorInsert(context, res, e, index, op.Size); + } + + context.Copy(GetVec(op.Rd), res); + } + + // long SignedShlReg(long op, int shiftLsB, bool round, int size); + // ulong UnsignedShlReg(ulong op, int shiftLsB, bool round, int size); + private static Operand EmitShlReg(ArmEmitterContext context, Operand op, Operand shiftLsB, bool round, int size, bool signed) + { + int eSize = 8 << size; + + Debug.Assert(op.Type == OperandType.I64); + Debug.Assert(shiftLsB.Type == OperandType.I32); + Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64); + + Operand lbl1 = Label(); + Operand lblEnd = Label(); + + Operand eSizeOp = Const(eSize); + Operand zero = Const(0); + Operand zeroL = Const(0L); + + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op); + + context.BranchIf(lbl1, shiftLsB, zero, Comparison.GreaterOrEqual); + context.Copy(res, signed + ? EmitSignedShrReg(context, op, context.Negate(shiftLsB), round, eSize) + : EmitUnsignedShrReg(context, op, context.Negate(shiftLsB), round, eSize)); + context.Branch(lblEnd); + + context.MarkLabel(lbl1); + context.BranchIf(lblEnd, shiftLsB, zero, Comparison.LessOrEqual); + Operand shl = context.ShiftLeft(op, shiftLsB); + Operand isGreaterOrEqual = context.ICompareGreaterOrEqual(shiftLsB, eSizeOp); + context.Copy(res, context.ConditionalSelect(isGreaterOrEqual, zeroL, shl)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + // long SignedShlRegSatQ(long op, int shiftLsB, bool round, int size); + // ulong UnsignedShlRegSatQ(ulong op, int shiftLsB, bool round, int size); + private static Operand EmitShlRegSatQ(ArmEmitterContext context, Operand op, Operand shiftLsB, bool round, int size, bool signed) + { + int eSize = 8 << size; + + Debug.Assert(op.Type == OperandType.I64); + Debug.Assert(shiftLsB.Type == OperandType.I32); + Debug.Assert(eSize == 8 || eSize == 16 || eSize == 32 || eSize == 64); + + Operand lbl1 = Label(); + Operand lbl2 = Label(); + Operand lblEnd = Label(); + + Operand eSizeOp = Const(eSize); + Operand zero = Const(0); + + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), op); + + context.BranchIf(lbl1, shiftLsB, zero, Comparison.GreaterOrEqual); + context.Copy(res, signed + ? EmitSignedShrReg(context, op, context.Negate(shiftLsB), round, eSize) + : EmitUnsignedShrReg(context, op, context.Negate(shiftLsB), round, eSize)); + context.Branch(lblEnd); + + context.MarkLabel(lbl1); + context.BranchIf(lblEnd, shiftLsB, zero, Comparison.LessOrEqual); + context.BranchIf(lbl2, shiftLsB, eSizeOp, Comparison.Less); + context.Copy(res, signed + ? EmitSignedSignSatQ(context, op, size) + : EmitUnsignedSignSatQ(context, op, size)); + context.Branch(lblEnd); + + context.MarkLabel(lbl2); + Operand shl = context.ShiftLeft(op, shiftLsB); + if (eSize == 64) + { + Operand sarOrShr = signed + ? context.ShiftRightSI(shl, shiftLsB) + : context.ShiftRightUI(shl, shiftLsB); + context.Copy(res, shl); + context.BranchIf(lblEnd, sarOrShr, op, Comparison.Equal); + context.Copy(res, signed + ? EmitSignedSignSatQ(context, op, size) + : EmitUnsignedSignSatQ(context, op, size)); + } + else + { + context.Copy(res, signed + ? EmitSignedSrcSatQ(context, shl, size, signedDst: true) + : EmitUnsignedSrcSatQ(context, shl, size, signedDst: false)); + } + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + + // shift := [1, 128]; eSize := {8, 16, 32, 64}. + // long SignedShrReg(long op, int shift, bool round, int eSize); + private static Operand EmitSignedShrReg(ArmEmitterContext context, Operand op, Operand shift, bool round, int eSize) + { + if (round) + { + Operand lblEnd = Label(); + + Operand eSizeOp = Const(eSize); + Operand zeroL = Const(0L); + Operand one = Const(1); + Operand oneL = Const(1L); + + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroL); + + context.BranchIf(lblEnd, shift, eSizeOp, Comparison.GreaterOrEqual); + Operand roundConst = context.ShiftLeft(oneL, context.Subtract(shift, one)); + Operand add = context.Add(op, roundConst); + Operand sar = context.ShiftRightSI(add, shift); + if (eSize == 64) + { + Operand shr = context.ShiftRightUI(add, shift); + Operand left = context.BitwiseAnd(context.Negate(op), context.BitwiseExclusiveOr(op, add)); + Operand isLess = context.ICompareLess(left, zeroL); + context.Copy(res, context.ConditionalSelect(isLess, shr, sar)); + } + else + { + context.Copy(res, sar); + } + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + else + { + Operand lblEnd = Label(); + + Operand eSizeOp = Const(eSize); + Operand zeroL = Const(0L); + Operand negOneL = Const(-1L); + + Operand sar = context.ShiftRightSI(op, shift); + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), sar); + + context.BranchIf(lblEnd, shift, eSizeOp, Comparison.Less); + Operand isLess = context.ICompareLess(op, zeroL); + context.Copy(res, context.ConditionalSelect(isLess, negOneL, zeroL)); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + } + + // shift := [1, 128]; eSize := {8, 16, 32, 64}. + // ulong UnsignedShrReg(ulong op, int shift, bool round, int eSize); + private static Operand EmitUnsignedShrReg(ArmEmitterContext context, Operand op, Operand shift, bool round, int eSize) + { + if (round) + { + Operand lblEnd = Label(); + + Operand zeroUL = Const(0UL); + Operand one = Const(1); + Operand oneUL = Const(1UL); + Operand eSizeMaxOp = Const(64); + Operand oneShl63UL = Const(1UL << 63); + + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), zeroUL); + + context.BranchIf(lblEnd, shift, eSizeMaxOp, Comparison.Greater); + Operand roundConst = context.ShiftLeft(oneUL, context.Subtract(shift, one)); + Operand add = context.Add(op, roundConst); + Operand shr = context.ShiftRightUI(add, shift); + Operand isEqual = context.ICompareEqual(shift, eSizeMaxOp); + context.Copy(res, context.ConditionalSelect(isEqual, zeroUL, shr)); + if (eSize == 64) + { + context.BranchIf(lblEnd, add, op, Comparison.GreaterOrEqualUI); + Operand right = context.BitwiseOr(shr, context.ShiftRightUI(oneShl63UL, context.Subtract(shift, one))); + context.Copy(res, context.ConditionalSelect(isEqual, oneUL, right)); + } + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + else + { + Operand lblEnd = Label(); + + Operand eSizeOp = Const(eSize); + Operand zeroUL = Const(0UL); + + Operand shr = context.ShiftRightUI(op, shift); + Operand res = context.Copy(context.AllocateLocal(OperandType.I64), shr); + + context.BranchIf(lblEnd, shift, eSizeOp, Comparison.Less); + context.Copy(res, zeroUL); + context.Branch(lblEnd); + + context.MarkLabel(lblEnd); + + return res; + } + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitSimdShift32.cs b/src/ARMeilleure/Instructions/InstEmitSimdShift32.cs new file mode 100644 index 00000000..9ac68088 --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSimdShift32.cs @@ -0,0 +1,389 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; +using System.Diagnostics; +using System.Reflection; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper; +using static ARMeilleure.Instructions.InstEmitSimdHelper32; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Vqrshrn(ArmEmitterContext context) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + + EmitRoundShrImmSaturatingNarrowOp(context, op.U ? ShrImmSaturatingNarrowFlags.VectorZxZx : ShrImmSaturatingNarrowFlags.VectorSxSx); + } + + public static void Vqrshrun(ArmEmitterContext context) + { + EmitRoundShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); + } + + public static void Vqshrn(ArmEmitterContext context) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + + EmitShrImmSaturatingNarrowOp(context, op.U ? ShrImmSaturatingNarrowFlags.VectorZxZx : ShrImmSaturatingNarrowFlags.VectorSxSx); + } + + public static void Vqshrun(ArmEmitterContext context) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.VectorSxZx); + } + + public static void Vrshr(ArmEmitterContext context) + { + EmitRoundShrImmOp(context, accumulate: false); + } + + public static void Vrshrn(ArmEmitterContext context) + { + EmitRoundShrImmNarrowOp(context, signed: false); + } + + public static void Vrsra(ArmEmitterContext context) + { + EmitRoundShrImmOp(context, accumulate: true); + } + + public static void Vshl(ArmEmitterContext context) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + + EmitVectorUnaryOpZx32(context, (op1) => context.ShiftLeft(op1, Const(op.Shift))); + } + + public static void Vshl_I(ArmEmitterContext context) + { + OpCode32SimdReg op = (OpCode32SimdReg)context.CurrOp; + + if (op.U) + { + EmitVectorBinaryOpZx32(context, (op1, op2) => EmitShlRegOp(context, op2, op1, op.Size, true)); + } + else + { + EmitVectorBinaryOpSx32(context, (op1, op2) => EmitShlRegOp(context, op2, op1, op.Size, false)); + } + } + + public static void Vshll(ArmEmitterContext context) + { + OpCode32SimdShImmLong op = (OpCode32SimdShImmLong)context.CurrOp; + + Operand res = context.VectorZero(); + + int elems = op.GetBytesCount() >> op.Size; + + for (int index = 0; index < elems; index++) + { + Operand me = EmitVectorExtract32(context, op.Qm, op.Im + index, op.Size, !op.U); + + if (op.Size == 2) + { + if (op.U) + { + me = context.ZeroExtend32(OperandType.I64, me); + } + else + { + me = context.SignExtend32(OperandType.I64, me); + } + } + + me = context.ShiftLeft(me, Const(op.Shift)); + + res = EmitVectorInsert(context, res, me, index, op.Size + 1); + } + + context.Copy(GetVecA32(op.Qd), res); + } + + public static void Vshr(ArmEmitterContext context) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + int shift = GetImmShr(op); + int maxShift = (8 << op.Size) - 1; + + if (op.U) + { + EmitVectorUnaryOpZx32(context, (op1) => (shift > maxShift) ? Const(op1.Type, 0) : context.ShiftRightUI(op1, Const(shift))); + } + else + { + EmitVectorUnaryOpSx32(context, (op1) => context.ShiftRightSI(op1, Const(Math.Min(maxShift, shift)))); + } + } + + public static void Vshrn(ArmEmitterContext context) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + int shift = GetImmShr(op); + + EmitVectorUnaryNarrowOp32(context, (op1) => context.ShiftRightUI(op1, Const(shift))); + } + + public static void Vsra(ArmEmitterContext context) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + int shift = GetImmShr(op); + int maxShift = (8 << op.Size) - 1; + + if (op.U) + { + EmitVectorImmBinaryQdQmOpZx32(context, (op1, op2) => + { + Operand shiftRes = shift > maxShift ? Const(op2.Type, 0) : context.ShiftRightUI(op2, Const(shift)); + + return context.Add(op1, shiftRes); + }); + } + else + { + EmitVectorImmBinaryQdQmOpSx32(context, (op1, op2) => context.Add(op1, context.ShiftRightSI(op2, Const(Math.Min(maxShift, shift))))); + } + } + + public static void EmitRoundShrImmOp(ArmEmitterContext context, bool accumulate) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + int shift = GetImmShr(op); + long roundConst = 1L << (shift - 1); + + if (op.U) + { + if (op.Size < 2) + { + EmitVectorUnaryOpZx32(context, (op1) => + { + op1 = context.Add(op1, Const(op1.Type, roundConst)); + + return context.ShiftRightUI(op1, Const(shift)); + }, accumulate); + } + else if (op.Size == 2) + { + EmitVectorUnaryOpZx32(context, (op1) => + { + op1 = context.ZeroExtend32(OperandType.I64, op1); + op1 = context.Add(op1, Const(op1.Type, roundConst)); + + return context.ConvertI64ToI32(context.ShiftRightUI(op1, Const(shift))); + }, accumulate); + } + else /* if (op.Size == 3) */ + { + EmitVectorUnaryOpZx32(context, (op1) => EmitShrImm64(context, op1, signed: false, roundConst, shift), accumulate); + } + } + else + { + if (op.Size < 2) + { + EmitVectorUnaryOpSx32(context, (op1) => + { + op1 = context.Add(op1, Const(op1.Type, roundConst)); + + return context.ShiftRightSI(op1, Const(shift)); + }, accumulate); + } + else if (op.Size == 2) + { + EmitVectorUnaryOpSx32(context, (op1) => + { + op1 = context.SignExtend32(OperandType.I64, op1); + op1 = context.Add(op1, Const(op1.Type, roundConst)); + + return context.ConvertI64ToI32(context.ShiftRightSI(op1, Const(shift))); + }, accumulate); + } + else /* if (op.Size == 3) */ + { + EmitVectorUnaryOpZx32(context, (op1) => EmitShrImm64(context, op1, signed: true, roundConst, shift), accumulate); + } + } + } + + private static void EmitRoundShrImmNarrowOp(ArmEmitterContext context, bool signed) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + + int shift = GetImmShr(op); + long roundConst = 1L << (shift - 1); + + EmitVectorUnaryNarrowOp32(context, (op1) => + { + if (op.Size <= 1) + { + op1 = context.Add(op1, Const(op1.Type, roundConst)); + op1 = signed ? context.ShiftRightSI(op1, Const(shift)) : context.ShiftRightUI(op1, Const(shift)); + } + else /* if (op.Size == 2 && round) */ + { + op1 = EmitShrImm64(context, op1, signed, roundConst, shift); // shift <= 32 + } + + return op1; + }, signed); + } + + private static Operand EmitShlRegOp(ArmEmitterContext context, Operand op, Operand shiftLsB, int size, bool unsigned) + { + if (shiftLsB.Type == OperandType.I64) + { + shiftLsB = context.ConvertI64ToI32(shiftLsB); + } + + shiftLsB = context.SignExtend8(OperandType.I32, shiftLsB); + Debug.Assert((uint)size < 4u); + + Operand negShiftLsB = context.Negate(shiftLsB); + + Operand isPositive = context.ICompareGreaterOrEqual(shiftLsB, Const(0)); + + Operand shl = context.ShiftLeft(op, shiftLsB); + Operand shr = unsigned ? context.ShiftRightUI(op, negShiftLsB) : context.ShiftRightSI(op, negShiftLsB); + + Operand res = context.ConditionalSelect(isPositive, shl, shr); + + if (unsigned) + { + Operand isOutOfRange = context.BitwiseOr( + context.ICompareGreaterOrEqual(shiftLsB, Const(8 << size)), + context.ICompareGreaterOrEqual(negShiftLsB, Const(8 << size))); + + return context.ConditionalSelect(isOutOfRange, Const(op.Type, 0), res); + } + else + { + Operand isOutOfRange0 = context.ICompareGreaterOrEqual(shiftLsB, Const(8 << size)); + Operand isOutOfRangeN = context.ICompareGreaterOrEqual(negShiftLsB, Const(8 << size)); + + // Also zero if shift is too negative, but value was positive. + isOutOfRange0 = context.BitwiseOr(isOutOfRange0, context.BitwiseAnd(isOutOfRangeN, context.ICompareGreaterOrEqual(op, Const(op.Type, 0)))); + + Operand min = (op.Type == OperandType.I64) ? Const(-1L) : Const(-1); + + return context.ConditionalSelect(isOutOfRange0, Const(op.Type, 0), context.ConditionalSelect(isOutOfRangeN, min, res)); + } + } + + [Flags] + private enum ShrImmSaturatingNarrowFlags + { + Scalar = 1 << 0, + SignedSrc = 1 << 1, + SignedDst = 1 << 2, + + Round = 1 << 3, + + ScalarSxSx = Scalar | SignedSrc | SignedDst, + ScalarSxZx = Scalar | SignedSrc, + ScalarZxZx = Scalar, + + VectorSxSx = SignedSrc | SignedDst, + VectorSxZx = SignedSrc, + VectorZxZx = 0 + } + + private static void EmitRoundShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags) + { + EmitShrImmSaturatingNarrowOp(context, ShrImmSaturatingNarrowFlags.Round | flags); + } + + private static void EmitShrImmSaturatingNarrowOp(ArmEmitterContext context, ShrImmSaturatingNarrowFlags flags) + { + OpCode32SimdShImm op = (OpCode32SimdShImm)context.CurrOp; + + bool scalar = (flags & ShrImmSaturatingNarrowFlags.Scalar) != 0; + bool signedSrc = (flags & ShrImmSaturatingNarrowFlags.SignedSrc) != 0; + bool signedDst = (flags & ShrImmSaturatingNarrowFlags.SignedDst) != 0; + bool round = (flags & ShrImmSaturatingNarrowFlags.Round) != 0; + + if (scalar) + { + // TODO: Support scalar operation. + throw new NotImplementedException(); + } + + int shift = GetImmShr(op); + long roundConst = 1L << (shift - 1); + + EmitVectorUnaryNarrowOp32(context, (op1) => + { + if (op.Size <= 1 || !round) + { + if (round) + { + op1 = context.Add(op1, Const(op1.Type, roundConst)); + } + + op1 = signedSrc ? context.ShiftRightSI(op1, Const(shift)) : context.ShiftRightUI(op1, Const(shift)); + } + else /* if (op.Size == 2 && round) */ + { + op1 = EmitShrImm64(context, op1, signedSrc, roundConst, shift); // shift <= 32 + } + + return EmitSatQ(context, op1, 8 << op.Size, signedSrc, signedDst); + }, signedSrc); + } + + private static int GetImmShr(OpCode32SimdShImm op) + { + return (8 << op.Size) - op.Shift; // Shr amount is flipped. + } + + // dst64 = (Int(src64, signed) + roundConst) >> shift; + private static Operand EmitShrImm64( + ArmEmitterContext context, + Operand value, + bool signed, + long roundConst, + int shift) + { + MethodInfo info = signed + ? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SignedShrImm64)) + : typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedShrImm64)); + + return context.Call(info, value, Const(roundConst), Const(shift)); + } + + private static Operand EmitSatQ(ArmEmitterContext context, Operand value, int eSize, bool signedSrc, bool signedDst) + { + Debug.Assert(eSize <= 32); + + long intMin = signedDst ? -(1L << (eSize - 1)) : 0; + long intMax = signedDst ? (1L << (eSize - 1)) - 1 : (1L << eSize) - 1; + + Operand gt = signedSrc + ? context.ICompareGreater(value, Const(value.Type, intMax)) + : context.ICompareGreaterUI(value, Const(value.Type, intMax)); + + Operand lt = signedSrc + ? context.ICompareLess(value, Const(value.Type, intMin)) + : context.ICompareLessUI(value, Const(value.Type, intMin)); + + value = context.ConditionalSelect(gt, Const(value.Type, intMax), value); + value = context.ConditionalSelect(lt, Const(value.Type, intMin), value); + + Operand lblNoSat = Label(); + + context.BranchIfFalse(lblNoSat, context.BitwiseOr(gt, lt)); + + SetFpFlag(context, FPState.QcFlag, Const(1)); + + context.MarkLabel(lblNoSat); + + return value; + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitSystem.cs b/src/ARMeilleure/Instructions/InstEmitSystem.cs new file mode 100644 index 00000000..f84829aa --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSystem.cs @@ -0,0 +1,248 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; +using System.Reflection; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit + { + private const int DczSizeLog2 = 4; // Log2 size in words + public const int DczSizeInBytes = 4 << DczSizeLog2; + + public static void Isb(ArmEmitterContext context) + { + // Execute as no-op. + } + + public static void Mrs(ArmEmitterContext context) + { + OpCodeSystem op = (OpCodeSystem)context.CurrOp; + + MethodInfo info; + + switch (GetPackedId(op)) + { + case 0b11_011_0000_0000_001: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCtrEl0)); break; + case 0b11_011_0000_0000_111: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetDczidEl0)); break; + case 0b11_011_0100_0010_000: EmitGetNzcv(context); return; + case 0b11_011_0100_0100_000: EmitGetFpcr(context); return; + case 0b11_011_0100_0100_001: EmitGetFpsr(context); return; + case 0b11_011_1101_0000_010: EmitGetTpidrEl0(context); return; + case 0b11_011_1101_0000_011: EmitGetTpidrroEl0(context); return; + case 0b11_011_1110_0000_000: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntfrqEl0)); break; + case 0b11_011_1110_0000_001: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntpctEl0)); break; + case 0b11_011_1110_0000_010: info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntvctEl0)); break; + + default: throw new NotImplementedException($"Unknown MRS 0x{op.RawOpCode:X8} at 0x{op.Address:X16}."); + } + + SetIntOrZR(context, op.Rt, context.Call(info)); + } + + public static void Msr(ArmEmitterContext context) + { + OpCodeSystem op = (OpCodeSystem)context.CurrOp; + + switch (GetPackedId(op)) + { + case 0b11_011_0100_0010_000: EmitSetNzcv(context); return; + case 0b11_011_0100_0100_000: EmitSetFpcr(context); return; + case 0b11_011_0100_0100_001: EmitSetFpsr(context); return; + case 0b11_011_1101_0000_010: EmitSetTpidrEl0(context); return; + + default: throw new NotImplementedException($"Unknown MSR 0x{op.RawOpCode:X8} at 0x{op.Address:X16}."); + } + } + + public static void Nop(ArmEmitterContext context) + { + // Do nothing. + } + + public static void Sys(ArmEmitterContext context) + { + // This instruction is used to do some operations on the CPU like cache invalidation, + // address translation and the like. + // We treat it as no-op here since we don't have any cache being emulated anyway. + OpCodeSystem op = (OpCodeSystem)context.CurrOp; + + switch (GetPackedId(op)) + { + case 0b11_011_0111_0100_001: + { + // DC ZVA + Operand t = GetIntOrZR(context, op.Rt); + + for (long offset = 0; offset < DczSizeInBytes; offset += 8) + { + Operand address = context.Add(t, Const(offset)); + + InstEmitMemoryHelper.EmitStore(context, address, RegisterConsts.ZeroIndex, 3); + } + + break; + } + + // No-op + case 0b11_011_0111_1110_001: // DC CIVAC + break; + + case 0b11_011_0111_0101_001: // IC IVAU + Operand target = Register(op.Rt, RegisterType.Integer, OperandType.I64); + context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.InvalidateCacheLine)), target); + break; + } + } + + private static int GetPackedId(OpCodeSystem op) + { + int id; + + id = op.Op2 << 0; + id |= op.CRm << 3; + id |= op.CRn << 7; + id |= op.Op1 << 11; + id |= op.Op0 << 14; + + return id; + } + + private static void EmitGetNzcv(ArmEmitterContext context) + { + OpCodeSystem op = (OpCodeSystem)context.CurrOp; + + Operand nzcv = context.ShiftLeft(GetFlag(PState.VFlag), Const((int)PState.VFlag)); + nzcv = context.BitwiseOr(nzcv, context.ShiftLeft(GetFlag(PState.CFlag), Const((int)PState.CFlag))); + nzcv = context.BitwiseOr(nzcv, context.ShiftLeft(GetFlag(PState.ZFlag), Const((int)PState.ZFlag))); + nzcv = context.BitwiseOr(nzcv, context.ShiftLeft(GetFlag(PState.NFlag), Const((int)PState.NFlag))); + + SetIntOrZR(context, op.Rt, nzcv); + } + + private static void EmitGetFpcr(ArmEmitterContext context) + { + OpCodeSystem op = (OpCodeSystem)context.CurrOp; + + Operand fpcr = Const(0); + + for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++) + { + if (FPCR.Mask.HasFlag((FPCR)(1u << flag))) + { + fpcr = context.BitwiseOr(fpcr, context.ShiftLeft(GetFpFlag((FPState)flag), Const(flag))); + } + } + + SetIntOrZR(context, op.Rt, fpcr); + } + + private static void EmitGetFpsr(ArmEmitterContext context) + { + OpCodeSystem op = (OpCodeSystem)context.CurrOp; + + context.SyncQcFlag(); + + Operand fpsr = Const(0); + + for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++) + { + if (FPSR.Mask.HasFlag((FPSR)(1u << flag))) + { + fpsr = context.BitwiseOr(fpsr, context.ShiftLeft(GetFpFlag((FPState)flag), Const(flag))); + } + } + + SetIntOrZR(context, op.Rt, fpsr); + } + + private static void EmitGetTpidrEl0(ArmEmitterContext context) + { + OpCodeSystem op = (OpCodeSystem)context.CurrOp; + + Operand nativeContext = context.LoadArgument(OperandType.I64, 0); + + Operand result = context.Load(OperandType.I64, context.Add(nativeContext, Const((ulong)NativeContext.GetTpidrEl0Offset()))); + + SetIntOrZR(context, op.Rt, result); + } + + private static void EmitGetTpidrroEl0(ArmEmitterContext context) + { + OpCodeSystem op = (OpCodeSystem)context.CurrOp; + + Operand nativeContext = context.LoadArgument(OperandType.I64, 0); + + Operand result = context.Load(OperandType.I64, context.Add(nativeContext, Const((ulong)NativeContext.GetTpidrroEl0Offset()))); + + SetIntOrZR(context, op.Rt, result); + } + + private static void EmitSetNzcv(ArmEmitterContext context) + { + OpCodeSystem op = (OpCodeSystem)context.CurrOp; + + Operand nzcv = GetIntOrZR(context, op.Rt); + nzcv = context.ConvertI64ToI32(nzcv); + + SetFlag(context, PState.VFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const((int)PState.VFlag)), Const(1))); + SetFlag(context, PState.CFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const((int)PState.CFlag)), Const(1))); + SetFlag(context, PState.ZFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const((int)PState.ZFlag)), Const(1))); + SetFlag(context, PState.NFlag, context.BitwiseAnd(context.ShiftRightUI(nzcv, Const((int)PState.NFlag)), Const(1))); + } + + private static void EmitSetFpcr(ArmEmitterContext context) + { + OpCodeSystem op = (OpCodeSystem)context.CurrOp; + + Operand fpcr = GetIntOrZR(context, op.Rt); + fpcr = context.ConvertI64ToI32(fpcr); + + for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++) + { + if (FPCR.Mask.HasFlag((FPCR)(1u << flag))) + { + SetFpFlag(context, (FPState)flag, context.BitwiseAnd(context.ShiftRightUI(fpcr, Const(flag)), Const(1))); + } + } + + context.UpdateArmFpMode(); + } + + private static void EmitSetFpsr(ArmEmitterContext context) + { + OpCodeSystem op = (OpCodeSystem)context.CurrOp; + + context.ClearQcFlagIfModified(); + + Operand fpsr = GetIntOrZR(context, op.Rt); + fpsr = context.ConvertI64ToI32(fpsr); + + for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++) + { + if (FPSR.Mask.HasFlag((FPSR)(1u << flag))) + { + SetFpFlag(context, (FPState)flag, context.BitwiseAnd(context.ShiftRightUI(fpsr, Const(flag)), Const(1))); + } + } + + context.UpdateArmFpMode(); + } + + private static void EmitSetTpidrEl0(ArmEmitterContext context) + { + OpCodeSystem op = (OpCodeSystem)context.CurrOp; + + Operand value = GetIntOrZR(context, op.Rt); + + Operand nativeContext = context.LoadArgument(OperandType.I64, 0); + + context.Store(context.Add(nativeContext, Const((ulong)NativeContext.GetTpidrEl0Offset())), value); + } + } +} diff --git a/src/ARMeilleure/Instructions/InstEmitSystem32.cs b/src/ARMeilleure/Instructions/InstEmitSystem32.cs new file mode 100644 index 00000000..f2732c99 --- /dev/null +++ b/src/ARMeilleure/Instructions/InstEmitSystem32.cs @@ -0,0 +1,351 @@ +using ARMeilleure.Decoders; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; +using System.Reflection; + +using static ARMeilleure.Instructions.InstEmitHelper; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Instructions +{ + static partial class InstEmit32 + { + public static void Mcr(ArmEmitterContext context) + { + OpCode32System op = (OpCode32System)context.CurrOp; + + if (op.Coproc != 15 || op.Opc1 != 0) + { + InstEmit.Und(context); + + return; + } + + switch (op.CRn) + { + case 13: // Process and Thread Info. + if (op.CRm != 0) + { + throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X} at 0x{op.Address:X} (0x{op.RawOpCode:X})."); + } + + switch (op.Opc2) + { + case 2: + EmitSetTpidrEl0(context); return; + + default: + throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X} at 0x{op.Address:X} (0x{op.RawOpCode:X})."); + } + + case 7: + switch (op.CRm) // Cache and Memory barrier. + { + case 10: + switch (op.Opc2) + { + case 5: // Data Memory Barrier Register. + return; // No-op. + + default: + throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X16} at 0x{op.Address:X16} (0x{op.RawOpCode:X})."); + } + + default: + throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X16} at 0x{op.Address:X16} (0x{op.RawOpCode:X})."); + } + + default: + throw new NotImplementedException($"Unknown MRC 0x{op.RawOpCode:X8} at 0x{op.Address:X16}."); + } + } + + public static void Mrc(ArmEmitterContext context) + { + OpCode32System op = (OpCode32System)context.CurrOp; + + if (op.Coproc != 15 || op.Opc1 != 0) + { + InstEmit.Und(context); + + return; + } + + Operand result; + + switch (op.CRn) + { + case 13: // Process and Thread Info. + if (op.CRm != 0) + { + throw new NotImplementedException($"Unknown MRC CRm 0x{op.CRm:X} at 0x{op.Address:X} (0x{op.RawOpCode:X})."); + } + + switch (op.Opc2) + { + case 2: + result = EmitGetTpidrEl0(context); break; + + case 3: + result = EmitGetTpidrroEl0(context); break; + + default: + throw new NotImplementedException($"Unknown MRC Opc2 0x{op.Opc2:X} at 0x{op.Address:X} (0x{op.RawOpCode:X})."); + } + + break; + + default: + throw new NotImplementedException($"Unknown MRC 0x{op.RawOpCode:X} at 0x{op.Address:X}."); + } + + if (op.Rt == RegisterAlias.Aarch32Pc) + { + // Special behavior: copy NZCV flags into APSR. + EmitSetNzcv(context, result); + + return; + } + else + { + SetIntA32(context, op.Rt, result); + } + } + + public static void Mrrc(ArmEmitterContext context) + { + OpCode32System op = (OpCode32System)context.CurrOp; + + if (op.Coproc != 15) + { + InstEmit.Und(context); + + return; + } + + int opc = op.MrrcOp; + + MethodInfo info; + + switch (op.CRm) + { + case 14: // Timer. + switch (opc) + { + case 0: + info = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntpctEl0)); break; + + default: + throw new NotImplementedException($"Unknown MRRC Opc1 0x{opc:X} at 0x{op.Address:X} (0x{op.RawOpCode:X})."); + } + + break; + + default: + throw new NotImplementedException($"Unknown MRRC 0x{op.RawOpCode:X} at 0x{op.Address:X}."); + } + + Operand result = context.Call(info); + + SetIntA32(context, op.Rt, context.ConvertI64ToI32(result)); + SetIntA32(context, op.CRn, context.ConvertI64ToI32(context.ShiftRightUI(result, Const(32)))); + } + + public static void Mrs(ArmEmitterContext context) + { + OpCode32Mrs op = (OpCode32Mrs)context.CurrOp; + + if (op.R) + { + throw new NotImplementedException("SPSR"); + } + else + { + Operand spsr = context.ShiftLeft(GetFlag(PState.VFlag), Const((int)PState.VFlag)); + spsr = context.BitwiseOr(spsr, context.ShiftLeft(GetFlag(PState.CFlag), Const((int)PState.CFlag))); + spsr = context.BitwiseOr(spsr, context.ShiftLeft(GetFlag(PState.ZFlag), Const((int)PState.ZFlag))); + spsr = context.BitwiseOr(spsr, context.ShiftLeft(GetFlag(PState.NFlag), Const((int)PState.NFlag))); + spsr = context.BitwiseOr(spsr, context.ShiftLeft(GetFlag(PState.QFlag), Const((int)PState.QFlag))); + + // TODO: Remaining flags. + + SetIntA32(context, op.Rd, spsr); + } + } + + public static void Msr(ArmEmitterContext context) + { + OpCode32MsrReg op = (OpCode32MsrReg)context.CurrOp; + + if (op.R) + { + throw new NotImplementedException("SPSR"); + } + else + { + if ((op.Mask & 8) != 0) + { + Operand value = GetIntA32(context, op.Rn); + + EmitSetNzcv(context, value); + + Operand q = context.BitwiseAnd(context.ShiftRightUI(value, Const((int)PState.QFlag)), Const(1)); + + SetFlag(context, PState.QFlag, q); + } + + if ((op.Mask & 4) != 0) + { + throw new NotImplementedException("APSR_g"); + } + + if ((op.Mask & 2) != 0) + { + throw new NotImplementedException("CPSR_x"); + } + + if ((op.Mask & 1) != 0) + { + throw new NotImplementedException("CPSR_c"); + } + } + } + + public static void Nop(ArmEmitterContext context) { } + + public static void Vmrs(ArmEmitterContext context) + { + OpCode32SimdSpecial op = (OpCode32SimdSpecial)context.CurrOp; + + if (op.Rt == RegisterAlias.Aarch32Pc && op.Sreg == 0b0001) + { + // Special behavior: copy NZCV flags into APSR. + SetFlag(context, PState.VFlag, GetFpFlag(FPState.VFlag)); + SetFlag(context, PState.CFlag, GetFpFlag(FPState.CFlag)); + SetFlag(context, PState.ZFlag, GetFpFlag(FPState.ZFlag)); + SetFlag(context, PState.NFlag, GetFpFlag(FPState.NFlag)); + + return; + } + + switch (op.Sreg) + { + case 0b0000: // FPSID + throw new NotImplementedException("Supervisor Only"); + case 0b0001: // FPSCR + EmitGetFpscr(context); return; + case 0b0101: // MVFR2 + throw new NotImplementedException("MVFR2"); + case 0b0110: // MVFR1 + throw new NotImplementedException("MVFR1"); + case 0b0111: // MVFR0 + throw new NotImplementedException("MVFR0"); + case 0b1000: // FPEXC + throw new NotImplementedException("Supervisor Only"); + default: + throw new NotImplementedException($"Unknown VMRS 0x{op.RawOpCode:X} at 0x{op.Address:X}."); + } + } + + public static void Vmsr(ArmEmitterContext context) + { + OpCode32SimdSpecial op = (OpCode32SimdSpecial)context.CurrOp; + + switch (op.Sreg) + { + case 0b0000: // FPSID + throw new NotImplementedException("Supervisor Only"); + case 0b0001: // FPSCR + EmitSetFpscr(context); return; + case 0b0101: // MVFR2 + throw new NotImplementedException("MVFR2"); + case 0b0110: // MVFR1 + throw new NotImplementedException("MVFR1"); + case 0b0111: // MVFR0 + throw new NotImplementedException("MVFR0"); + case 0b1000: // FPEXC + throw new NotImplementedException("Supervisor Only"); + default: + throw new NotImplementedException($"Unknown VMSR 0x{op.RawOpCode:X} at 0x{op.Address:X}."); + } + } + + private static void EmitSetNzcv(ArmEmitterContext context, Operand t) + { + Operand v = context.BitwiseAnd(context.ShiftRightUI(t, Const((int)PState.VFlag)), Const(1)); + Operand c = context.BitwiseAnd(context.ShiftRightUI(t, Const((int)PState.CFlag)), Const(1)); + Operand z = context.BitwiseAnd(context.ShiftRightUI(t, Const((int)PState.ZFlag)), Const(1)); + Operand n = context.BitwiseAnd(context.ShiftRightUI(t, Const((int)PState.NFlag)), Const(1)); + + SetFlag(context, PState.VFlag, v); + SetFlag(context, PState.CFlag, c); + SetFlag(context, PState.ZFlag, z); + SetFlag(context, PState.NFlag, n); + } + + private static void EmitGetFpscr(ArmEmitterContext context) + { + OpCode32SimdSpecial op = (OpCode32SimdSpecial)context.CurrOp; + + Operand fpscr = Const(0); + + for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++) + { + if (FPSCR.Mask.HasFlag((FPSCR)(1u << flag))) + { + fpscr = context.BitwiseOr(fpscr, context.ShiftLeft(GetFpFlag((FPState)flag), Const(flag))); + } + } + + SetIntA32(context, op.Rt, fpscr); + } + + private static void EmitSetFpscr(ArmEmitterContext context) + { + OpCode32SimdSpecial op = (OpCode32SimdSpecial)context.CurrOp; + + Operand fpscr = GetIntA32(context, op.Rt); + + for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++) + { + if (FPSCR.Mask.HasFlag((FPSCR)(1u << flag))) + { + SetFpFlag(context, (FPState)flag, context.BitwiseAnd(context.ShiftRightUI(fpscr, Const(flag)), Const(1))); + } + } + + context.UpdateArmFpMode(); + } + + private static Operand EmitGetTpidrEl0(ArmEmitterContext context) + { + OpCode32System op = (OpCode32System)context.CurrOp; + + Operand nativeContext = context.LoadArgument(OperandType.I64, 0); + + return context.Load(OperandType.I64, context.Add(nativeContext, Const((ulong)NativeContext.GetTpidrEl0Offset()))); + } + + private static Operand EmitGetTpidrroEl0(ArmEmitterContext context) + { + OpCode32System op = (OpCode32System)context.CurrOp; + + Operand nativeContext = context.LoadArgument(OperandType.I64, 0); + + return context.Load(OperandType.I64, context.Add(nativeContext, Const((ulong)NativeContext.GetTpidrroEl0Offset()))); + } + + private static void EmitSetTpidrEl0(ArmEmitterContext context) + { + OpCode32System op = (OpCode32System)context.CurrOp; + + Operand value = GetIntA32(context, op.Rt); + + Operand nativeContext = context.LoadArgument(OperandType.I64, 0); + + context.Store(context.Add(nativeContext, Const((ulong)NativeContext.GetTpidrEl0Offset())), context.ZeroExtend32(OperandType.I64, value)); + } + } +} diff --git a/src/ARMeilleure/Instructions/InstName.cs b/src/ARMeilleure/Instructions/InstName.cs new file mode 100644 index 00000000..fd71d92e --- /dev/null +++ b/src/ARMeilleure/Instructions/InstName.cs @@ -0,0 +1,685 @@ +namespace ARMeilleure.Instructions +{ + enum InstName + { + // Base (AArch64) + Adc, + Adcs, + Add, + Adds, + Adr, + Adrp, + And, + Ands, + Asrv, + B, + B_Cond, + Bfm, + Bic, + Bics, + Bl, + Blr, + Br, + Brk, + Cbnz, + Cbz, + Ccmn, + Ccmp, + Clrex, + Cls, + Clz, + Crc32b, + Crc32h, + Crc32w, + Crc32x, + Crc32cb, + Crc32ch, + Crc32cw, + Crc32cx, + Csdb, + Csel, + Csinc, + Csinv, + Csneg, + Dmb, + Dsb, + Eon, + Eor, + Esb, + Extr, + Hint, + Isb, + It, + Ldar, + Ldaxp, + Ldaxr, + Ldp, + Ldr, + Ldr_Literal, + Ldrs, + Ldxr, + Ldxp, + Lslv, + Lsrv, + Madd, + Movk, + Movn, + Movz, + Mrs, + Msr, + Msub, + Nop, + Orn, + Orr, + Prfm, + Rbit, + Ret, + Rev16, + Rev32, + Rev64, + Rorv, + Sbc, + Sbcs, + Sbfm, + Sdiv, + Sel, + Sev, + Sevl, + Shsub8, + Smaddl, + Smsubl, + Smulh, + Smull, + Smulw_, + Ssat, + Ssat16, + Stlr, + Stlxp, + Stlxr, + Stp, + Str, + Stxp, + Stxr, + Sub, + Subs, + Svc, + Sxtb, + Sxth, + Sys, + Tbnz, + Tbz, + Tsb, + Ubfm, + Udiv, + Umaddl, + Umsubl, + Umulh, + Und, + Wfe, + Wfi, + Yield, + + // FP & SIMD (AArch64) + Abs_S, + Abs_V, + Add_S, + Add_V, + Addhn_V, + Addp_S, + Addp_V, + Addv_V, + Aesd_V, + Aese_V, + Aesimc_V, + Aesmc_V, + And_V, + Bic_V, + Bic_Vi, + Bif_V, + Bit_V, + Bsl_V, + Cls_V, + Clz_V, + Cmeq_S, + Cmeq_V, + Cmge_S, + Cmge_V, + Cmgt_S, + Cmgt_V, + Cmhi_S, + Cmhi_V, + Cmhs_S, + Cmhs_V, + Cmle_S, + Cmle_V, + Cmlt_S, + Cmlt_V, + Cmtst_S, + Cmtst_V, + Cnt_V, + Dup_Gp, + Dup_S, + Dup_V, + Eor_V, + Ext_V, + Fabd_S, + Fabd_V, + Fabs_S, + Fabs_V, + Facge_S, + Facge_V, + Facgt_S, + Facgt_V, + Fadd_S, + Fadd_V, + Faddp_S, + Faddp_V, + Fccmp_S, + Fccmpe_S, + Fcmeq_S, + Fcmeq_V, + Fcmge_S, + Fcmge_V, + Fcmgt_S, + Fcmgt_V, + Fcmle_S, + Fcmle_V, + Fcmlt_S, + Fcmlt_V, + Fcmp_S, + Fcmpe_S, + Fcsel_S, + Fcvt_S, + Fcvtas_Gp, + Fcvtas_S, + Fcvtas_V, + Fcvtau_Gp, + Fcvtau_S, + Fcvtau_V, + Fcvtl_V, + Fcvtms_Gp, + Fcvtms_V, + Fcvtmu_Gp, + Fcvtn_V, + Fcvtns_Gp, + Fcvtns_S, + Fcvtns_V, + Fcvtnu_S, + Fcvtnu_V, + Fcvtps_Gp, + Fcvtpu_Gp, + Fcvtzs_Gp, + Fcvtzs_Gp_Fixed, + Fcvtzs_S, + Fcvtzs_V, + Fcvtzs_V_Fixed, + Fcvtzu_Gp, + Fcvtzu_Gp_Fixed, + Fcvtzu_S, + Fcvtzu_V, + Fcvtzu_V_Fixed, + Fdiv_S, + Fdiv_V, + Fmadd_S, + Fmax_S, + Fmax_V, + Fmaxnm_S, + Fmaxnm_V, + Fmaxnmp_S, + Fmaxnmp_V, + Fmaxnmv_V, + Fmaxp_V, + Fmaxv_V, + Fmin_S, + Fmin_V, + Fminnm_S, + Fminnm_V, + Fminnmp_S, + Fminnmp_V, + Fminnmv_V, + Fminp_V, + Fminv_V, + Fmla_Se, + Fmla_V, + Fmla_Ve, + Fmls_Se, + Fmls_V, + Fmls_Ve, + Fmov_S, + Fmov_Si, + Fmov_Vi, + Fmov_Ftoi, + Fmov_Itof, + Fmov_Ftoi1, + Fmov_Itof1, + Fmsub_S, + Fmul_S, + Fmul_Se, + Fmul_V, + Fmul_Ve, + Fmulx_S, + Fmulx_Se, + Fmulx_V, + Fmulx_Ve, + Fneg_S, + Fneg_V, + Fnmadd_S, + Fnmsub_S, + Fnmul_S, + Frecpe_S, + Frecpe_V, + Frecps_S, + Frecps_V, + Frecpx_S, + Frinta_S, + Frinta_V, + Frinti_S, + Frinti_V, + Frintm_S, + Frintm_V, + Frintn_S, + Frintn_V, + Frintp_S, + Frintp_V, + Frintx_S, + Frintx_V, + Frintz_S, + Frintz_V, + Frsqrte_S, + Frsqrte_V, + Frsqrts_S, + Frsqrts_V, + Fsqrt_S, + Fsqrt_V, + Fsub_S, + Fsub_V, + Ins_Gp, + Ins_V, + Ld__Vms, + Ld__Vss, + Mla_V, + Mla_Ve, + Mls_V, + Mls_Ve, + Movi_V, + Mul_V, + Mul_Ve, + Mvni_V, + Neg_S, + Neg_V, + Not_V, + Orn_V, + Orr_V, + Orr_Vi, + Pmull_V, + Raddhn_V, + Rbit_V, + Rev16_V, + Rev32_V, + Rev64_V, + Rshrn_V, + Rsubhn_V, + Saba_V, + Sabal_V, + Sabd_V, + Sabdl_V, + Sadalp_V, + Saddl_V, + Saddlp_V, + Saddlv_V, + Saddw_V, + Scvtf_Gp, + Scvtf_Gp_Fixed, + Scvtf_S, + Scvtf_S_Fixed, + Scvtf_V, + Scvtf_V_Fixed, + Sha1c_V, + Sha1h_V, + Sha1m_V, + Sha1p_V, + Sha1su0_V, + Sha1su1_V, + Sha256h_V, + Sha256h2_V, + Sha256su0_V, + Sha256su1_V, + Shadd_V, + Shl_S, + Shl_V, + Shll_V, + Shrn_V, + Shsub_V, + Sli_S, + Sli_V, + Smax_V, + Smaxp_V, + Smaxv_V, + Smin_V, + Sminp_V, + Sminv_V, + Smlal_V, + Smlal_Ve, + Smlsl_V, + Smlsl_Ve, + Smov_S, + Smull_V, + Smull_Ve, + Sqabs_S, + Sqabs_V, + Sqadd_S, + Sqadd_V, + Sqdmulh_S, + Sqdmulh_V, + Sqdmulh_Ve, + Sqneg_S, + Sqneg_V, + Sqrdmulh_S, + Sqrdmulh_V, + Sqrdmulh_Ve, + Sqrshl_V, + Sqrshrn_S, + Sqrshrn_V, + Sqrshrun_S, + Sqrshrun_V, + Sqshl_V, + Sqshrn_S, + Sqshrn_V, + Sqshrun_S, + Sqshrun_V, + Sqsub_S, + Sqsub_V, + Sqxtn_S, + Sqxtn_V, + Sqxtun_S, + Sqxtun_V, + Srhadd_V, + Sri_S, + Sri_V, + Srshl_V, + Srshr_S, + Srshr_V, + Srsra_S, + Srsra_V, + Sshl_S, + Sshl_V, + Sshll_V, + Sshr_S, + Sshr_V, + Ssra_S, + Ssra_V, + Ssubl_V, + Ssubw_V, + St__Vms, + St__Vss, + Sub_S, + Sub_V, + Subhn_V, + Suqadd_S, + Suqadd_V, + Tbl_V, + Tbx_V, + Trn1_V, + Trn2_V, + Uaba_V, + Uabal_V, + Uabd_V, + Uabdl_V, + Uadalp_V, + Uaddl_V, + Uaddlp_V, + Uaddlv_V, + Uaddw_V, + Ucvtf_Gp, + Ucvtf_Gp_Fixed, + Ucvtf_S, + Ucvtf_S_Fixed, + Ucvtf_V, + Ucvtf_V_Fixed, + Uhadd_V, + Uhsub_V, + Umax_V, + Umaxp_V, + Umaxv_V, + Umin_V, + Uminp_V, + Uminv_V, + Umlal_V, + Umlal_Ve, + Umlsl_V, + Umlsl_Ve, + Umov_S, + Umull_V, + Umull_Ve, + Uqadd_S, + Uqadd_V, + Uqrshl_V, + Uqrshrn_S, + Uqrshrn_V, + Uqshl_V, + Uqshrn_S, + Uqshrn_V, + Uqsub_S, + Uqsub_V, + Uqxtn_S, + Uqxtn_V, + Urhadd_V, + Urshl_V, + Urshr_S, + Urshr_V, + Ursra_S, + Ursra_V, + Ushl_S, + Ushl_V, + Ushll_V, + Ushr_S, + Ushr_V, + Usqadd_S, + Usqadd_V, + Usra_S, + Usra_V, + Usubl_V, + Usubw_V, + Uzp1_V, + Uzp2_V, + Xtn_V, + Zip1_V, + Zip2_V, + + // Base (AArch32) + Bfc, + Bfi, + Blx, + Bx, + Cmp, + Cmn, + Movt, + Mul, + Lda, + Ldab, + Ldaex, + Ldaexb, + Ldaexd, + Ldaexh, + Ldah, + Ldm, + Ldrb, + Ldrd, + Ldrex, + Ldrexb, + Ldrexd, + Ldrexh, + Ldrh, + Ldrsb, + Ldrsh, + Mcr, + Mla, + Mls, + Mov, + Mrc, + Mrrc, + Mvn, + Pkh, + Pld, + Pop, + Push, + Rev, + Revsh, + Rsb, + Rsc, + Sadd8, + Sbfx, + Shadd8, + Smla__, + Smlal, + Smlal__, + Smlaw_, + Smmla, + Smmls, + Smul__, + Smmul, + Ssub8, + Stl, + Stlb, + Stlex, + Stlexb, + Stlexd, + Stlexh, + Stlh, + Stm, + Strb, + Strd, + Strex, + Strexb, + Strexd, + Strexh, + Strh, + Sxtb16, + Tbb, + Tbh, + Teq, + Trap, + Tst, + Uadd8, + Ubfx, + Uhadd8, + Uhsub8, + Umaal, + Umlal, + Umull, + Usat, + Usat16, + Usub8, + Uxtb, + Uxtb16, + Uxth, + + // FP & SIMD (AArch32) + Vabd, + Vabdl, + Vabs, + Vadd, + Vaddl, + Vaddw, + Vand, + Vbic, + Vbif, + Vbit, + Vbsl, + Vceq, + Vcge, + Vcgt, + Vcle, + Vclt, + Vcmp, + Vcmpe, + Vcnt, + Vcvt, + Vdiv, + Vdup, + Veor, + Vext, + Vfma, + Vfms, + Vfnma, + Vfnms, + Vhadd, + Vld1, + Vld2, + Vld3, + Vld4, + Vldm, + Vldr, + Vmax, + Vmaxnm, + Vmin, + Vminnm, + Vmla, + Vmlal, + Vmls, + Vmlsl, + Vmov, + Vmovl, + Vmovn, + Vmrs, + Vmsr, + Vmul, + Vmull, + Vmvn, + Vneg, + Vnmul, + Vnmla, + Vnmls, + Vorn, + Vorr, + Vpadd, + Vpaddl, + Vpmax, + Vpmin, + Vqadd, + Vqdmulh, + Vqmovn, + Vqmovun, + Vqrshrn, + Vqrshrun, + Vqshrn, + Vqshrun, + Vqsub, + Vrev, + Vrhadd, + Vrint, + Vrinta, + Vrintm, + Vrintn, + Vrintp, + Vrintx, + Vrshr, + Vrshrn, + Vsel, + Vshl, + Vshll, + Vshr, + Vshrn, + Vst1, + Vst2, + Vst3, + Vst4, + Vstm, + Vstr, + Vsqrt, + Vrecpe, + Vrecps, + Vrsqrte, + Vrsqrts, + Vrsra, + Vsra, + Vsub, + Vsubl, + Vsubw, + Vtbl, + Vtrn, + Vtst, + Vuzp, + Vzip, + } +} diff --git a/src/ARMeilleure/Instructions/NativeInterface.cs b/src/ARMeilleure/Instructions/NativeInterface.cs new file mode 100644 index 00000000..2c35387a --- /dev/null +++ b/src/ARMeilleure/Instructions/NativeInterface.cs @@ -0,0 +1,195 @@ +using ARMeilleure.Memory; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; + +namespace ARMeilleure.Instructions +{ + static class NativeInterface + { + private class ThreadContext + { + public ExecutionContext Context { get; } + public IMemoryManager Memory { get; } + public Translator Translator { get; } + + public ThreadContext(ExecutionContext context, IMemoryManager memory, Translator translator) + { + Context = context; + Memory = memory; + Translator = translator; + } + } + + [ThreadStatic] + private static ThreadContext Context; + + public static void RegisterThread(ExecutionContext context, IMemoryManager memory, Translator translator) + { + Context = new ThreadContext(context, memory, translator); + } + + public static void UnregisterThread() + { + Context = null; + } + + public static void Break(ulong address, int imm) + { + Statistics.PauseTimer(); + + GetContext().OnBreak(address, imm); + + Statistics.ResumeTimer(); + } + + public static void SupervisorCall(ulong address, int imm) + { + Statistics.PauseTimer(); + + GetContext().OnSupervisorCall(address, imm); + + Statistics.ResumeTimer(); + } + + public static void Undefined(ulong address, int opCode) + { + Statistics.PauseTimer(); + + GetContext().OnUndefined(address, opCode); + + Statistics.ResumeTimer(); + } + + #region "System registers" + public static ulong GetCtrEl0() + { + return (ulong)GetContext().CtrEl0; + } + + public static ulong GetDczidEl0() + { + return (ulong)GetContext().DczidEl0; + } + + public static ulong GetCntfrqEl0() + { + return GetContext().CntfrqEl0; + } + + public static ulong GetCntpctEl0() + { + return GetContext().CntpctEl0; + } + + public static ulong GetCntvctEl0() + { + return GetContext().CntvctEl0; + } + #endregion + + #region "Read" + public static byte ReadByte(ulong address) + { + return GetMemoryManager().ReadTracked<byte>(address); + } + + public static ushort ReadUInt16(ulong address) + { + return GetMemoryManager().ReadTracked<ushort>(address); + } + + public static uint ReadUInt32(ulong address) + { + return GetMemoryManager().ReadTracked<uint>(address); + } + + public static ulong ReadUInt64(ulong address) + { + return GetMemoryManager().ReadTracked<ulong>(address); + } + + public static V128 ReadVector128(ulong address) + { + return GetMemoryManager().ReadTracked<V128>(address); + } + #endregion + + #region "Write" + public static void WriteByte(ulong address, byte value) + { + GetMemoryManager().Write(address, value); + } + + public static void WriteUInt16(ulong address, ushort value) + { + GetMemoryManager().Write(address, value); + } + + public static void WriteUInt32(ulong address, uint value) + { + GetMemoryManager().Write(address, value); + } + + public static void WriteUInt64(ulong address, ulong value) + { + GetMemoryManager().Write(address, value); + } + + public static void WriteVector128(ulong address, V128 value) + { + GetMemoryManager().Write(address, value); + } + #endregion + + public static void EnqueueForRejit(ulong address) + { + Context.Translator.EnqueueForRejit(address, GetContext().ExecutionMode); + } + + public static void SignalMemoryTracking(ulong address, ulong size, bool write) + { + GetMemoryManager().SignalMemoryTracking(address, size, write); + } + + public static void ThrowInvalidMemoryAccess(ulong address) + { + throw new InvalidAccessException(address); + } + + public static ulong GetFunctionAddress(ulong address) + { + TranslatedFunction function = Context.Translator.GetOrTranslate(address, GetContext().ExecutionMode); + + return (ulong)function.FuncPointer.ToInt64(); + } + + public static void InvalidateCacheLine(ulong address) + { + Context.Translator.InvalidateJitCacheRegion(address, InstEmit.DczSizeInBytes); + } + + public static bool CheckSynchronization() + { + Statistics.PauseTimer(); + + ExecutionContext context = GetContext(); + + context.CheckInterrupt(); + + Statistics.ResumeTimer(); + + return context.Running; + } + + public static ExecutionContext GetContext() + { + return Context.Context; + } + + public static IMemoryManager GetMemoryManager() + { + return Context.Memory; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Instructions/SoftFallback.cs b/src/ARMeilleure/Instructions/SoftFallback.cs new file mode 100644 index 00000000..06d76a67 --- /dev/null +++ b/src/ARMeilleure/Instructions/SoftFallback.cs @@ -0,0 +1,624 @@ +using ARMeilleure.State; +using System; + +namespace ARMeilleure.Instructions +{ + static class SoftFallback + { +#region "ShrImm64" + public static long SignedShrImm64(long value, long roundConst, int shift) + { + if (roundConst == 0L) + { + if (shift <= 63) + { + return value >> shift; + } + else /* if (shift == 64) */ + { + if (value < 0L) + { + return -1L; + } + else /* if (value >= 0L) */ + { + return 0L; + } + } + } + else /* if (roundConst == 1L << (shift - 1)) */ + { + if (shift <= 63) + { + long add = value + roundConst; + + if ((~value & (value ^ add)) < 0L) + { + return (long)((ulong)add >> shift); + } + else + { + return add >> shift; + } + } + else /* if (shift == 64) */ + { + return 0L; + } + } + } + + public static ulong UnsignedShrImm64(ulong value, long roundConst, int shift) + { + if (roundConst == 0L) + { + if (shift <= 63) + { + return value >> shift; + } + else /* if (shift == 64) */ + { + return 0UL; + } + } + else /* if (roundConst == 1L << (shift - 1)) */ + { + ulong add = value + (ulong)roundConst; + + if ((add < value) && (add < (ulong)roundConst)) + { + if (shift <= 63) + { + return (add >> shift) | (0x8000000000000000UL >> (shift - 1)); + } + else /* if (shift == 64) */ + { + return 1UL; + } + } + else + { + if (shift <= 63) + { + return add >> shift; + } + else /* if (shift == 64) */ + { + return 0UL; + } + } + } + } +#endregion + +#region "Saturation" + public static int SatF32ToS32(float value) + { + if (float.IsNaN(value)) return 0; + + return value >= int.MaxValue ? int.MaxValue : + value <= int.MinValue ? int.MinValue : (int)value; + } + + public static long SatF32ToS64(float value) + { + if (float.IsNaN(value)) return 0; + + return value >= long.MaxValue ? long.MaxValue : + value <= long.MinValue ? long.MinValue : (long)value; + } + + public static uint SatF32ToU32(float value) + { + if (float.IsNaN(value)) return 0; + + return value >= uint.MaxValue ? uint.MaxValue : + value <= uint.MinValue ? uint.MinValue : (uint)value; + } + + public static ulong SatF32ToU64(float value) + { + if (float.IsNaN(value)) return 0; + + return value >= ulong.MaxValue ? ulong.MaxValue : + value <= ulong.MinValue ? ulong.MinValue : (ulong)value; + } + + public static int SatF64ToS32(double value) + { + if (double.IsNaN(value)) return 0; + + return value >= int.MaxValue ? int.MaxValue : + value <= int.MinValue ? int.MinValue : (int)value; + } + + public static long SatF64ToS64(double value) + { + if (double.IsNaN(value)) return 0; + + return value >= long.MaxValue ? long.MaxValue : + value <= long.MinValue ? long.MinValue : (long)value; + } + + public static uint SatF64ToU32(double value) + { + if (double.IsNaN(value)) return 0; + + return value >= uint.MaxValue ? uint.MaxValue : + value <= uint.MinValue ? uint.MinValue : (uint)value; + } + + public static ulong SatF64ToU64(double value) + { + if (double.IsNaN(value)) return 0; + + return value >= ulong.MaxValue ? ulong.MaxValue : + value <= ulong.MinValue ? ulong.MinValue : (ulong)value; + } +#endregion + +#region "Count" + public static ulong CountLeadingSigns(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.). + { + value ^= value >> 1; + + int highBit = size - 2; + + for (int bit = highBit; bit >= 0; bit--) + { + if (((int)(value >> bit) & 0b1) != 0) + { + return (ulong)(highBit - bit); + } + } + + return (ulong)(size - 1); + } + + private static ReadOnlySpan<byte> ClzNibbleTbl => new byte[] { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 }; + + public static ulong CountLeadingZeros(ulong value, int size) // size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.). + { + if (value == 0ul) + { + return (ulong)size; + } + + int nibbleIdx = size; + int preCount, count = 0; + + do + { + nibbleIdx -= 4; + preCount = ClzNibbleTbl[(int)(value >> nibbleIdx) & 0b1111]; + count += preCount; + } + while (preCount == 4); + + return (ulong)count; + } +#endregion + +#region "Table" + public static V128 Tbl1(V128 vector, int bytes, V128 tb0) + { + return TblOrTbx(default, vector, bytes, tb0); + } + + public static V128 Tbl2(V128 vector, int bytes, V128 tb0, V128 tb1) + { + return TblOrTbx(default, vector, bytes, tb0, tb1); + } + + public static V128 Tbl3(V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2) + { + return TblOrTbx(default, vector, bytes, tb0, tb1, tb2); + } + + public static V128 Tbl4(V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2, V128 tb3) + { + return TblOrTbx(default, vector, bytes, tb0, tb1, tb2, tb3); + } + + public static V128 Tbx1(V128 dest, V128 vector, int bytes, V128 tb0) + { + return TblOrTbx(dest, vector, bytes, tb0); + } + + public static V128 Tbx2(V128 dest, V128 vector, int bytes, V128 tb0, V128 tb1) + { + return TblOrTbx(dest, vector, bytes, tb0, tb1); + } + + public static V128 Tbx3(V128 dest, V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2) + { + return TblOrTbx(dest, vector, bytes, tb0, tb1, tb2); + } + + public static V128 Tbx4(V128 dest, V128 vector, int bytes, V128 tb0, V128 tb1, V128 tb2, V128 tb3) + { + return TblOrTbx(dest, vector, bytes, tb0, tb1, tb2, tb3); + } + + private static V128 TblOrTbx(V128 dest, V128 vector, int bytes, params V128[] tb) + { + byte[] res = new byte[16]; + + if (dest != default) + { + Buffer.BlockCopy(dest.ToArray(), 0, res, 0, bytes); + } + + byte[] table = new byte[tb.Length * 16]; + + for (byte index = 0; index < tb.Length; index++) + { + Buffer.BlockCopy(tb[index].ToArray(), 0, table, index * 16, 16); + } + + byte[] v = vector.ToArray(); + + for (byte index = 0; index < bytes; index++) + { + byte tblIndex = v[index]; + + if (tblIndex < table.Length) + { + res[index] = table[tblIndex]; + } + } + + return new V128(res); + } +#endregion + +#region "Crc32" + private const uint Crc32RevPoly = 0xedb88320; + private const uint Crc32cRevPoly = 0x82f63b78; + + public static uint Crc32b(uint crc, byte value) => Crc32 (crc, Crc32RevPoly, value); + public static uint Crc32h(uint crc, ushort value) => Crc32h(crc, Crc32RevPoly, value); + public static uint Crc32w(uint crc, uint value) => Crc32w(crc, Crc32RevPoly, value); + public static uint Crc32x(uint crc, ulong value) => Crc32x(crc, Crc32RevPoly, value); + + public static uint Crc32cb(uint crc, byte value) => Crc32 (crc, Crc32cRevPoly, value); + public static uint Crc32ch(uint crc, ushort value) => Crc32h(crc, Crc32cRevPoly, value); + public static uint Crc32cw(uint crc, uint value) => Crc32w(crc, Crc32cRevPoly, value); + public static uint Crc32cx(uint crc, ulong value) => Crc32x(crc, Crc32cRevPoly, value); + + private static uint Crc32h(uint crc, uint poly, ushort val) + { + crc = Crc32(crc, poly, (byte)(val >> 0)); + crc = Crc32(crc, poly, (byte)(val >> 8)); + + return crc; + } + + private static uint Crc32w(uint crc, uint poly, uint val) + { + crc = Crc32(crc, poly, (byte)(val >> 0)); + crc = Crc32(crc, poly, (byte)(val >> 8)); + crc = Crc32(crc, poly, (byte)(val >> 16)); + crc = Crc32(crc, poly, (byte)(val >> 24)); + + return crc; + } + + private static uint Crc32x(uint crc, uint poly, ulong val) + { + crc = Crc32(crc, poly, (byte)(val >> 0)); + crc = Crc32(crc, poly, (byte)(val >> 8)); + crc = Crc32(crc, poly, (byte)(val >> 16)); + crc = Crc32(crc, poly, (byte)(val >> 24)); + crc = Crc32(crc, poly, (byte)(val >> 32)); + crc = Crc32(crc, poly, (byte)(val >> 40)); + crc = Crc32(crc, poly, (byte)(val >> 48)); + crc = Crc32(crc, poly, (byte)(val >> 56)); + + return crc; + } + + private static uint Crc32(uint crc, uint poly, byte val) + { + crc ^= val; + + for (int bit = 7; bit >= 0; bit--) + { + uint mask = (uint)(-(int)(crc & 1)); + + crc = (crc >> 1) ^ (poly & mask); + } + + return crc; + } +#endregion + +#region "Aes" + public static V128 Decrypt(V128 value, V128 roundKey) + { + return CryptoHelper.AesInvSubBytes(CryptoHelper.AesInvShiftRows(value ^ roundKey)); + } + + public static V128 Encrypt(V128 value, V128 roundKey) + { + return CryptoHelper.AesSubBytes(CryptoHelper.AesShiftRows(value ^ roundKey)); + } + + public static V128 InverseMixColumns(V128 value) + { + return CryptoHelper.AesInvMixColumns(value); + } + + public static V128 MixColumns(V128 value) + { + return CryptoHelper.AesMixColumns(value); + } +#endregion + +#region "Sha1" + public static V128 HashChoose(V128 hash_abcd, uint hash_e, V128 wk) + { + for (int e = 0; e <= 3; e++) + { + uint t = ShaChoose(hash_abcd.Extract<uint>(1), + hash_abcd.Extract<uint>(2), + hash_abcd.Extract<uint>(3)); + + hash_e += Rol(hash_abcd.Extract<uint>(0), 5) + t + wk.Extract<uint>(e); + + t = Rol(hash_abcd.Extract<uint>(1), 30); + + hash_abcd.Insert(1, t); + + Rol32_160(ref hash_e, ref hash_abcd); + } + + return hash_abcd; + } + + public static uint FixedRotate(uint hash_e) + { + return hash_e.Rol(30); + } + + public static V128 HashMajority(V128 hash_abcd, uint hash_e, V128 wk) + { + for (int e = 0; e <= 3; e++) + { + uint t = ShaMajority(hash_abcd.Extract<uint>(1), + hash_abcd.Extract<uint>(2), + hash_abcd.Extract<uint>(3)); + + hash_e += Rol(hash_abcd.Extract<uint>(0), 5) + t + wk.Extract<uint>(e); + + t = Rol(hash_abcd.Extract<uint>(1), 30); + + hash_abcd.Insert(1, t); + + Rol32_160(ref hash_e, ref hash_abcd); + } + + return hash_abcd; + } + + public static V128 HashParity(V128 hash_abcd, uint hash_e, V128 wk) + { + for (int e = 0; e <= 3; e++) + { + uint t = ShaParity(hash_abcd.Extract<uint>(1), + hash_abcd.Extract<uint>(2), + hash_abcd.Extract<uint>(3)); + + hash_e += Rol(hash_abcd.Extract<uint>(0), 5) + t + wk.Extract<uint>(e); + + t = Rol(hash_abcd.Extract<uint>(1), 30); + + hash_abcd.Insert(1, t); + + Rol32_160(ref hash_e, ref hash_abcd); + } + + return hash_abcd; + } + + public static V128 Sha1SchedulePart1(V128 w0_3, V128 w4_7, V128 w8_11) + { + ulong t2 = w4_7.Extract<ulong>(0); + ulong t1 = w0_3.Extract<ulong>(1); + + V128 result = new V128(t1, t2); + + return result ^ (w0_3 ^ w8_11); + } + + public static V128 Sha1SchedulePart2(V128 tw0_3, V128 w12_15) + { + V128 t = tw0_3 ^ (w12_15 >> 32); + + uint tE0 = t.Extract<uint>(0); + uint tE1 = t.Extract<uint>(1); + uint tE2 = t.Extract<uint>(2); + uint tE3 = t.Extract<uint>(3); + + return new V128(tE0.Rol(1), tE1.Rol(1), tE2.Rol(1), tE3.Rol(1) ^ tE0.Rol(2)); + } + + private static void Rol32_160(ref uint y, ref V128 x) + { + uint xE3 = x.Extract<uint>(3); + + x <<= 32; + x.Insert(0, y); + + y = xE3; + } + + private static uint ShaChoose(uint x, uint y, uint z) + { + return ((y ^ z) & x) ^ z; + } + + private static uint ShaMajority(uint x, uint y, uint z) + { + return (x & y) | ((x | y) & z); + } + + private static uint ShaParity(uint x, uint y, uint z) + { + return x ^ y ^ z; + } + + private static uint Rol(this uint value, int count) + { + return (value << count) | (value >> (32 - count)); + } +#endregion + +#region "Sha256" + public static V128 HashLower(V128 hash_abcd, V128 hash_efgh, V128 wk) + { + return Sha256Hash(hash_abcd, hash_efgh, wk, part1: true); + } + + public static V128 HashUpper(V128 hash_abcd, V128 hash_efgh, V128 wk) + { + return Sha256Hash(hash_abcd, hash_efgh, wk, part1: false); + } + + public static V128 Sha256SchedulePart1(V128 w0_3, V128 w4_7) + { + V128 result = new V128(); + + for (int e = 0; e <= 3; e++) + { + uint elt = (e <= 2 ? w0_3 : w4_7).Extract<uint>(e <= 2 ? e + 1 : 0); + + elt = elt.Ror(7) ^ elt.Ror(18) ^ elt.Lsr(3); + + elt += w0_3.Extract<uint>(e); + + result.Insert(e, elt); + } + + return result; + } + + public static V128 Sha256SchedulePart2(V128 w0_3, V128 w8_11, V128 w12_15) + { + V128 result = new V128(); + + ulong t1 = w12_15.Extract<ulong>(1); + + for (int e = 0; e <= 1; e++) + { + uint elt = t1.ULongPart(e); + + elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10); + + elt += w0_3.Extract<uint>(e) + w8_11.Extract<uint>(e + 1); + + result.Insert(e, elt); + } + + t1 = result.Extract<ulong>(0); + + for (int e = 2; e <= 3; e++) + { + uint elt = t1.ULongPart(e - 2); + + elt = elt.Ror(17) ^ elt.Ror(19) ^ elt.Lsr(10); + + elt += w0_3.Extract<uint>(e) + (e == 2 ? w8_11 : w12_15).Extract<uint>(e == 2 ? 3 : 0); + + result.Insert(e, elt); + } + + return result; + } + + private static V128 Sha256Hash(V128 x, V128 y, V128 w, bool part1) + { + for (int e = 0; e <= 3; e++) + { + uint chs = ShaChoose(y.Extract<uint>(0), + y.Extract<uint>(1), + y.Extract<uint>(2)); + + uint maj = ShaMajority(x.Extract<uint>(0), + x.Extract<uint>(1), + x.Extract<uint>(2)); + + uint t1 = y.Extract<uint>(3) + ShaHashSigma1(y.Extract<uint>(0)) + chs + w.Extract<uint>(e); + + uint t2 = t1 + x.Extract<uint>(3); + + x.Insert(3, t2); + + t2 = t1 + ShaHashSigma0(x.Extract<uint>(0)) + maj; + + y.Insert(3, t2); + + Rol32_256(ref y, ref x); + } + + return part1 ? x : y; + } + + private static void Rol32_256(ref V128 y, ref V128 x) + { + uint yE3 = y.Extract<uint>(3); + uint xE3 = x.Extract<uint>(3); + + y <<= 32; + x <<= 32; + + y.Insert(0, xE3); + x.Insert(0, yE3); + } + + private static uint ShaHashSigma0(uint x) + { + return x.Ror(2) ^ x.Ror(13) ^ x.Ror(22); + } + + private static uint ShaHashSigma1(uint x) + { + return x.Ror(6) ^ x.Ror(11) ^ x.Ror(25); + } + + private static uint Ror(this uint value, int count) + { + return (value >> count) | (value << (32 - count)); + } + + private static uint Lsr(this uint value, int count) + { + return value >> count; + } + + private static uint ULongPart(this ulong value, int part) + { + return part == 0 + ? (uint)(value & 0xFFFFFFFFUL) + : (uint)(value >> 32); + } +#endregion + + public static V128 PolynomialMult64_128(ulong op1, ulong op2) + { + V128 result = V128.Zero; + + V128 op2_128 = new V128(op2, 0); + + for (int i = 0; i < 64; i++) + { + if (((op1 >> i) & 1) == 1) + { + result ^= op2_128 << i; + } + } + + return result; + } + } +} diff --git a/src/ARMeilleure/Instructions/SoftFloat.cs b/src/ARMeilleure/Instructions/SoftFloat.cs new file mode 100644 index 00000000..9e3db68d --- /dev/null +++ b/src/ARMeilleure/Instructions/SoftFloat.cs @@ -0,0 +1,3480 @@ +using ARMeilleure.State; +using System; +using System.Diagnostics; + +namespace ARMeilleure.Instructions +{ + static class SoftFloat + { + static SoftFloat() + { + RecipEstimateTable = BuildRecipEstimateTable(); + RecipSqrtEstimateTable = BuildRecipSqrtEstimateTable(); + } + + public static readonly byte[] RecipEstimateTable; + public static readonly byte[] RecipSqrtEstimateTable; + + private static byte[] BuildRecipEstimateTable() + { + byte[] tbl = new byte[256]; + + for (int idx = 0; idx < 256; idx++) + { + uint src = (uint)idx + 256u; + + Debug.Assert(256u <= src && src < 512u); + + src = (src << 1) + 1u; + + uint aux = (1u << 19) / src; + + uint dst = (aux + 1u) >> 1; + + Debug.Assert(256u <= dst && dst < 512u); + + tbl[idx] = (byte)(dst - 256u); + } + + return tbl; + } + + private static byte[] BuildRecipSqrtEstimateTable() + { + byte[] tbl = new byte[384]; + + for (int idx = 0; idx < 384; idx++) + { + uint src = (uint)idx + 128u; + + Debug.Assert(128u <= src && src < 512u); + + if (src < 256u) + { + src = (src << 1) + 1u; + } + else + { + src = (src >> 1) << 1; + src = (src + 1u) << 1; + } + + uint aux = 512u; + + while (src * (aux + 1u) * (aux + 1u) < (1u << 28)) + { + aux = aux + 1u; + } + + uint dst = (aux + 1u) >> 1; + + Debug.Assert(256u <= dst && dst < 512u); + + tbl[idx] = (byte)(dst - 256u); + } + + return tbl; + } + + public static void FPProcessException(FPException exc, ExecutionContext context) + { + FPProcessException(exc, context, context.Fpcr); + } + + public static void FPProcessException(FPException exc, ExecutionContext context, FPCR fpcr) + { + int enable = (int)exc + 8; + + if ((fpcr & (FPCR)(1 << enable)) != 0) + { + throw new NotImplementedException("Floating-point trap handling."); + } + else + { + context.Fpsr |= (FPSR)(1 << (int)exc); + } + } + + public static FPRoundingMode GetRoundingMode(this FPCR fpcr) + { + const int RModeShift = 22; + + return (FPRoundingMode)(((uint)fpcr >> RModeShift) & 3u); + } + } + + static class SoftFloat16 + { + public static ushort FPDefaultNaN() + { + return (ushort)0x7E00u; + } + + public static ushort FPInfinity(bool sign) + { + return sign ? (ushort)0xFC00u : (ushort)0x7C00u; + } + + public static ushort FPZero(bool sign) + { + return sign ? (ushort)0x8000u : (ushort)0x0000u; + } + + public static ushort FPMaxNormal(bool sign) + { + return sign ? (ushort)0xFBFFu : (ushort)0x7BFFu; + } + + public static double FPUnpackCv( + this ushort valueBits, + out FPType type, + out bool sign, + ExecutionContext context) + { + sign = (~(uint)valueBits & 0x8000u) == 0u; + + uint exp16 = ((uint)valueBits & 0x7C00u) >> 10; + uint frac16 = (uint)valueBits & 0x03FFu; + + double real; + + if (exp16 == 0u) + { + if (frac16 == 0u) + { + type = FPType.Zero; + real = 0d; + } + else + { + type = FPType.Nonzero; // Subnormal. + real = Math.Pow(2d, -14) * ((double)frac16 * Math.Pow(2d, -10)); + } + } + else if (exp16 == 0x1Fu && (context.Fpcr & FPCR.Ahp) == 0) + { + if (frac16 == 0u) + { + type = FPType.Infinity; + real = Math.Pow(2d, 1000); + } + else + { + type = (~frac16 & 0x0200u) == 0u ? FPType.QNaN : FPType.SNaN; + real = 0d; + } + } + else + { + type = FPType.Nonzero; // Normal. + real = Math.Pow(2d, (int)exp16 - 15) * (1d + (double)frac16 * Math.Pow(2d, -10)); + } + + return sign ? -real : real; + } + + public static ushort FPRoundCv(double real, ExecutionContext context) + { + const int minimumExp = -14; + + const int e = 5; + const int f = 10; + + bool sign; + double mantissa; + + if (real < 0d) + { + sign = true; + mantissa = -real; + } + else + { + sign = false; + mantissa = real; + } + + int exponent = 0; + + while (mantissa < 1d) + { + mantissa *= 2d; + exponent--; + } + + while (mantissa >= 2d) + { + mantissa /= 2d; + exponent++; + } + + uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0); + + if (biasedExp == 0u) + { + mantissa /= Math.Pow(2d, minimumExp - exponent); + } + + uint intMant = (uint)Math.Floor(mantissa * Math.Pow(2d, f)); + double error = mantissa * Math.Pow(2d, f) - (double)intMant; + + if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0)) + { + SoftFloat.FPProcessException(FPException.Underflow, context); + } + + bool overflowToInf; + bool roundUp; + + switch (context.Fpcr.GetRoundingMode()) + { + default: + case FPRoundingMode.ToNearest: + roundUp = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u)); + overflowToInf = true; + break; + + case FPRoundingMode.TowardsPlusInfinity: + roundUp = (error != 0d && !sign); + overflowToInf = !sign; + break; + + case FPRoundingMode.TowardsMinusInfinity: + roundUp = (error != 0d && sign); + overflowToInf = sign; + break; + + case FPRoundingMode.TowardsZero: + roundUp = false; + overflowToInf = false; + break; + } + + if (roundUp) + { + intMant++; + + if (intMant == 1u << f) + { + biasedExp = 1u; + } + + if (intMant == 1u << (f + 1)) + { + biasedExp++; + intMant >>= 1; + } + } + + ushort resultBits; + + if ((context.Fpcr & FPCR.Ahp) == 0) + { + if (biasedExp >= (1u << e) - 1u) + { + resultBits = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); + + SoftFloat.FPProcessException(FPException.Overflow, context); + + error = 1d; + } + else + { + resultBits = (ushort)((sign ? 1u : 0u) << 15 | (biasedExp & 0x1Fu) << 10 | (intMant & 0x03FFu)); + } + } + else + { + if (biasedExp >= 1u << e) + { + resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu); + + SoftFloat.FPProcessException(FPException.InvalidOp, context); + + error = 0d; + } + else + { + resultBits = (ushort)((sign ? 1u : 0u) << 15 | (biasedExp & 0x1Fu) << 10 | (intMant & 0x03FFu)); + } + } + + if (error != 0d) + { + SoftFloat.FPProcessException(FPException.Inexact, context); + } + + return resultBits; + } + } + + static class SoftFloat16_32 + { + public static float FPConvert(ushort valueBits) + { + ExecutionContext context = NativeInterface.GetContext(); + + double real = valueBits.FPUnpackCv(out FPType type, out bool sign, context); + + float result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + if ((context.Fpcr & FPCR.Dn) != 0) + { + result = SoftFloat32.FPDefaultNaN(); + } + else + { + result = FPConvertNaN(valueBits); + } + + if (type == FPType.SNaN) + { + SoftFloat.FPProcessException(FPException.InvalidOp, context); + } + } + else if (type == FPType.Infinity) + { + result = SoftFloat32.FPInfinity(sign); + } + else if (type == FPType.Zero) + { + result = SoftFloat32.FPZero(sign); + } + else + { + result = FPRoundCv(real, context); + } + + return result; + } + + private static float FPRoundCv(double real, ExecutionContext context) + { + const int minimumExp = -126; + + const int e = 8; + const int f = 23; + + bool sign; + double mantissa; + + if (real < 0d) + { + sign = true; + mantissa = -real; + } + else + { + sign = false; + mantissa = real; + } + + int exponent = 0; + + while (mantissa < 1d) + { + mantissa *= 2d; + exponent--; + } + + while (mantissa >= 2d) + { + mantissa /= 2d; + exponent++; + } + + if ((context.Fpcr & FPCR.Fz) != 0 && exponent < minimumExp) + { + context.Fpsr |= FPSR.Ufc; + + return SoftFloat32.FPZero(sign); + } + + uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0); + + if (biasedExp == 0u) + { + mantissa /= Math.Pow(2d, minimumExp - exponent); + } + + uint intMant = (uint)Math.Floor(mantissa * Math.Pow(2d, f)); + double error = mantissa * Math.Pow(2d, f) - (double)intMant; + + if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0)) + { + SoftFloat.FPProcessException(FPException.Underflow, context); + } + + bool overflowToInf; + bool roundUp; + + switch (context.Fpcr.GetRoundingMode()) + { + default: + case FPRoundingMode.ToNearest: + roundUp = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u)); + overflowToInf = true; + break; + + case FPRoundingMode.TowardsPlusInfinity: + roundUp = (error != 0d && !sign); + overflowToInf = !sign; + break; + + case FPRoundingMode.TowardsMinusInfinity: + roundUp = (error != 0d && sign); + overflowToInf = sign; + break; + + case FPRoundingMode.TowardsZero: + roundUp = false; + overflowToInf = false; + break; + } + + if (roundUp) + { + intMant++; + + if (intMant == 1u << f) + { + biasedExp = 1u; + } + + if (intMant == 1u << (f + 1)) + { + biasedExp++; + intMant >>= 1; + } + } + + float result; + + if (biasedExp >= (1u << e) - 1u) + { + result = overflowToInf ? SoftFloat32.FPInfinity(sign) : SoftFloat32.FPMaxNormal(sign); + + SoftFloat.FPProcessException(FPException.Overflow, context); + + error = 1d; + } + else + { + result = BitConverter.Int32BitsToSingle( + (int)((sign ? 1u : 0u) << 31 | (biasedExp & 0xFFu) << 23 | (intMant & 0x007FFFFFu))); + } + + if (error != 0d) + { + SoftFloat.FPProcessException(FPException.Inexact, context); + } + + return result; + } + + private static float FPConvertNaN(ushort valueBits) + { + return BitConverter.Int32BitsToSingle( + (int)(((uint)valueBits & 0x8000u) << 16 | 0x7FC00000u | ((uint)valueBits & 0x01FFu) << 13)); + } + } + + static class SoftFloat16_64 + { + public static double FPConvert(ushort valueBits) + { + ExecutionContext context = NativeInterface.GetContext(); + + double real = valueBits.FPUnpackCv(out FPType type, out bool sign, context); + + double result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + if ((context.Fpcr & FPCR.Dn) != 0) + { + result = SoftFloat64.FPDefaultNaN(); + } + else + { + result = FPConvertNaN(valueBits); + } + + if (type == FPType.SNaN) + { + SoftFloat.FPProcessException(FPException.InvalidOp, context); + } + } + else if (type == FPType.Infinity) + { + result = SoftFloat64.FPInfinity(sign); + } + else if (type == FPType.Zero) + { + result = SoftFloat64.FPZero(sign); + } + else + { + result = FPRoundCv(real, context); + } + + return result; + } + + private static double FPRoundCv(double real, ExecutionContext context) + { + const int minimumExp = -1022; + + const int e = 11; + const int f = 52; + + bool sign; + double mantissa; + + if (real < 0d) + { + sign = true; + mantissa = -real; + } + else + { + sign = false; + mantissa = real; + } + + int exponent = 0; + + while (mantissa < 1d) + { + mantissa *= 2d; + exponent--; + } + + while (mantissa >= 2d) + { + mantissa /= 2d; + exponent++; + } + + if ((context.Fpcr & FPCR.Fz) != 0 && exponent < minimumExp) + { + context.Fpsr |= FPSR.Ufc; + + return SoftFloat64.FPZero(sign); + } + + uint biasedExp = (uint)Math.Max(exponent - minimumExp + 1, 0); + + if (biasedExp == 0u) + { + mantissa /= Math.Pow(2d, minimumExp - exponent); + } + + ulong intMant = (ulong)Math.Floor(mantissa * Math.Pow(2d, f)); + double error = mantissa * Math.Pow(2d, f) - (double)intMant; + + if (biasedExp == 0u && (error != 0d || (context.Fpcr & FPCR.Ufe) != 0)) + { + SoftFloat.FPProcessException(FPException.Underflow, context); + } + + bool overflowToInf; + bool roundUp; + + switch (context.Fpcr.GetRoundingMode()) + { + default: + case FPRoundingMode.ToNearest: + roundUp = (error > 0.5d || (error == 0.5d && (intMant & 1u) == 1u)); + overflowToInf = true; + break; + + case FPRoundingMode.TowardsPlusInfinity: + roundUp = (error != 0d && !sign); + overflowToInf = !sign; + break; + + case FPRoundingMode.TowardsMinusInfinity: + roundUp = (error != 0d && sign); + overflowToInf = sign; + break; + + case FPRoundingMode.TowardsZero: + roundUp = false; + overflowToInf = false; + break; + } + + if (roundUp) + { + intMant++; + + if (intMant == 1ul << f) + { + biasedExp = 1u; + } + + if (intMant == 1ul << (f + 1)) + { + biasedExp++; + intMant >>= 1; + } + } + + double result; + + if (biasedExp >= (1u << e) - 1u) + { + result = overflowToInf ? SoftFloat64.FPInfinity(sign) : SoftFloat64.FPMaxNormal(sign); + + SoftFloat.FPProcessException(FPException.Overflow, context); + + error = 1d; + } + else + { + result = BitConverter.Int64BitsToDouble( + (long)((sign ? 1ul : 0ul) << 63 | (biasedExp & 0x7FFul) << 52 | (intMant & 0x000FFFFFFFFFFFFFul))); + } + + if (error != 0d) + { + SoftFloat.FPProcessException(FPException.Inexact, context); + } + + return result; + } + + private static double FPConvertNaN(ushort valueBits) + { + return BitConverter.Int64BitsToDouble( + (long)(((ulong)valueBits & 0x8000ul) << 48 | 0x7FF8000000000000ul | ((ulong)valueBits & 0x01FFul) << 42)); + } + } + + static class SoftFloat32_16 + { + public static ushort FPConvert(float value) + { + ExecutionContext context = NativeInterface.GetContext(); + + double real = value.FPUnpackCv(out FPType type, out bool sign, out uint valueBits, context); + + bool altHp = (context.Fpcr & FPCR.Ahp) != 0; + + ushort resultBits; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + if (altHp) + { + resultBits = SoftFloat16.FPZero(sign); + } + else if ((context.Fpcr & FPCR.Dn) != 0) + { + resultBits = SoftFloat16.FPDefaultNaN(); + } + else + { + resultBits = FPConvertNaN(valueBits); + } + + if (type == FPType.SNaN || altHp) + { + SoftFloat.FPProcessException(FPException.InvalidOp, context); + } + } + else if (type == FPType.Infinity) + { + if (altHp) + { + resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu); + + SoftFloat.FPProcessException(FPException.InvalidOp, context); + } + else + { + resultBits = SoftFloat16.FPInfinity(sign); + } + } + else if (type == FPType.Zero) + { + resultBits = SoftFloat16.FPZero(sign); + } + else + { + resultBits = SoftFloat16.FPRoundCv(real, context); + } + + return resultBits; + } + + private static double FPUnpackCv( + this float value, + out FPType type, + out bool sign, + out uint valueBits, + ExecutionContext context) + { + valueBits = (uint)BitConverter.SingleToInt32Bits(value); + + sign = (~valueBits & 0x80000000u) == 0u; + + uint exp32 = (valueBits & 0x7F800000u) >> 23; + uint frac32 = valueBits & 0x007FFFFFu; + + double real; + + if (exp32 == 0u) + { + if (frac32 == 0u || (context.Fpcr & FPCR.Fz) != 0) + { + type = FPType.Zero; + real = 0d; + + if (frac32 != 0u) + { + SoftFloat.FPProcessException(FPException.InputDenorm, context); + } + } + else + { + type = FPType.Nonzero; // Subnormal. + real = Math.Pow(2d, -126) * ((double)frac32 * Math.Pow(2d, -23)); + } + } + else if (exp32 == 0xFFu) + { + if (frac32 == 0u) + { + type = FPType.Infinity; + real = Math.Pow(2d, 1000); + } + else + { + type = (~frac32 & 0x00400000u) == 0u ? FPType.QNaN : FPType.SNaN; + real = 0d; + } + } + else + { + type = FPType.Nonzero; // Normal. + real = Math.Pow(2d, (int)exp32 - 127) * (1d + (double)frac32 * Math.Pow(2d, -23)); + } + + return sign ? -real : real; + } + + private static ushort FPConvertNaN(uint valueBits) + { + return (ushort)((valueBits & 0x80000000u) >> 16 | 0x7E00u | (valueBits & 0x003FE000u) >> 13); + } + } + + static class SoftFloat32 + { + public static float FPAdd(float value1, float value2) + { + return FPAddFpscr(value1, value2, false); + } + + public static float FPAddFpscr(float value1, float value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if (inf1 && inf2 && sign1 == !sign2) + { + result = FPDefaultNaN(); + + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + else if ((inf1 && !sign1) || (inf2 && !sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == sign2) + { + result = FPZero(sign1); + } + else + { + result = value1 + value2; + + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static int FPCompare(float value1, float value2, bool signalNaNs) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context, fpcr); + + int result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = 0b0011; + + if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs) + { + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + } + else + { + if (value1 == value2) + { + result = 0b0110; + } + else if (value1 < value2) + { + result = 0b1000; + } + else + { + result = 0b0010; + } + } + + return result; + } + + public static float FPCompareEQ(float value1, float value2) + { + return FPCompareEQFpscr(value1, value2, false); + } + + public static float FPCompareEQFpscr(float value1, float value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); + + float result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = ZerosOrOnes(false); + + if (type1 == FPType.SNaN || type2 == FPType.SNaN) + { + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + } + else + { + result = ZerosOrOnes(value1 == value2); + } + + return result; + } + + public static float FPCompareGE(float value1, float value2) + { + return FPCompareGEFpscr(value1, value2, false); + } + + public static float FPCompareGEFpscr(float value1, float value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); + + float result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = ZerosOrOnes(false); + + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + else + { + result = ZerosOrOnes(value1 >= value2); + } + + return result; + } + + public static float FPCompareGT(float value1, float value2) + { + return FPCompareGTFpscr(value1, value2, false); + } + + public static float FPCompareGTFpscr(float value1, float value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); + + float result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = ZerosOrOnes(false); + + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + else + { + result = ZerosOrOnes(value1 > value2); + } + + return result; + } + + public static float FPCompareLE(float value1, float value2) + { + return FPCompareGE(value2, value1); + } + + public static float FPCompareLT(float value1, float value2) + { + return FPCompareGT(value2, value1); + } + + public static float FPCompareLEFpscr(float value1, float value2, bool standardFpscr) + { + return FPCompareGEFpscr(value2, value1, standardFpscr); + } + + public static float FPCompareLTFpscr(float value1, float value2, bool standardFpscr) + { + return FPCompareGTFpscr(value2, value1, standardFpscr); + } + + public static float FPDiv(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && inf2) || (zero1 && zero2)) + { + result = FPDefaultNaN(); + + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + else if (inf1 || zero2) + { + result = FPInfinity(sign1 ^ sign2); + + if (!inf1) + { + SoftFloat.FPProcessException(FPException.DivideByZero, context, fpcr); + } + } + else if (zero1 || inf2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 / value2; + + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPMax(float value1, float value2) + { + return FPMaxFpscr(value1, value2, false); + } + + public static float FPMaxFpscr(float value1, float value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + if (value1 > value2) + { + if (type1 == FPType.Infinity) + { + result = FPInfinity(sign1); + } + else if (type1 == FPType.Zero) + { + result = FPZero(sign1 && sign2); + } + else + { + result = value1; + + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + else + { + if (type2 == FPType.Infinity) + { + result = FPInfinity(sign2); + } + else if (type2 == FPType.Zero) + { + result = FPZero(sign1 && sign2); + } + else + { + result = value2; + + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + } + + return result; + } + + public static float FPMaxNum(float value1, float value2) + { + return FPMaxNumFpscr(value1, value2, false); + } + + public static float FPMaxNumFpscr(float value1, float value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); + + if (type1 == FPType.QNaN && type2 != FPType.QNaN) + { + value1 = FPInfinity(true); + } + else if (type1 != FPType.QNaN && type2 == FPType.QNaN) + { + value2 = FPInfinity(true); + } + + return FPMaxFpscr(value1, value2, standardFpscr); + } + + public static float FPMin(float value1, float value2) + { + return FPMinFpscr(value1, value2, false); + } + + public static float FPMinFpscr(float value1, float value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + if (value1 < value2) + { + if (type1 == FPType.Infinity) + { + result = FPInfinity(sign1); + } + else if (type1 == FPType.Zero) + { + result = FPZero(sign1 || sign2); + } + else + { + result = value1; + + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + else + { + if (type2 == FPType.Infinity) + { + result = FPInfinity(sign2); + } + else if (type2 == FPType.Zero) + { + result = FPZero(sign1 || sign2); + } + else + { + result = value2; + + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + } + + return result; + } + + public static float FPMinNum(float value1, float value2) + { + return FPMinNumFpscr(value1, value2, false); + } + + public static float FPMinNumFpscr(float value1, float value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); + + if (type1 == FPType.QNaN && type2 != FPType.QNaN) + { + value1 = FPInfinity(false); + } + else if (type1 != FPType.QNaN && type2 == FPType.QNaN) + { + value2 = FPInfinity(false); + } + + return FPMinFpscr(value1, value2, standardFpscr); + } + + public static float FPMul(float value1, float value2) + { + return FPMulFpscr(value1, value2, false); + } + + public static float FPMulFpscr(float value1, float value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPDefaultNaN(); + + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else if (zero1 || zero2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 * value2; + + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPMulAdd(float valueA, float value1, float value2) + { + return FPMulAddFpscr(valueA, value1, value2, false); + } + + public static float FPMulAddFpscr(float valueA, float value1, float value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out uint addend, context, fpcr); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); + + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + float result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context, fpcr); + + if (typeA == FPType.QNaN && ((inf1 && zero2) || (zero1 && inf2))) + { + result = FPDefaultNaN(); + + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + + if (!done) + { + bool infA = typeA == FPType.Infinity; bool zeroA = typeA == FPType.Zero; + + bool signP = sign1 ^ sign2; + bool infP = inf1 || inf2; + bool zeroP = zero1 || zero2; + + if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP)) + { + result = FPDefaultNaN(); + + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + else if ((infA && !signA) || (infP && !signP)) + { + result = FPInfinity(false); + } + else if ((infA && signA) || (infP && signP)) + { + result = FPInfinity(true); + } + else if (zeroA && zeroP && signA == signP) + { + result = FPZero(signA); + } + else + { + result = MathF.FusedMultiplyAdd(value1, value2, valueA); + + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPMulSub(float valueA, float value1, float value2) + { + value1 = value1.FPNeg(); + + return FPMulAdd(valueA, value1, value2); + } + + public static float FPMulSubFpscr(float valueA, float value1, float value2, bool standardFpscr) + { + value1 = value1.FPNeg(); + + return FPMulAddFpscr(valueA, value1, value2, standardFpscr); + } + + public static float FPMulX(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPTwo(sign1 ^ sign2); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else if (zero1 || zero2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 * value2; + + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPNegMulAdd(float valueA, float value1, float value2) + { + valueA = valueA.FPNeg(); + value1 = value1.FPNeg(); + + return FPMulAdd(valueA, value1, value2); + } + + public static float FPNegMulSub(float valueA, float value1, float value2) + { + valueA = valueA.FPNeg(); + + return FPMulAdd(valueA, value1, value2); + } + + public static float FPRecipEstimate(float value) + { + return FPRecipEstimateFpscr(value, false); + } + + public static float FPRecipEstimateFpscr(float value, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value.FPUnpack(out FPType type, out bool sign, out uint op, context, fpcr); + + float result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context, fpcr); + } + else if (type == FPType.Infinity) + { + result = FPZero(sign); + } + else if (type == FPType.Zero) + { + result = FPInfinity(sign); + + SoftFloat.FPProcessException(FPException.DivideByZero, context, fpcr); + } + else if (MathF.Abs(value) < MathF.Pow(2f, -128)) + { + bool overflowToInf; + + switch (fpcr.GetRoundingMode()) + { + default: + case FPRoundingMode.ToNearest: overflowToInf = true; break; + case FPRoundingMode.TowardsPlusInfinity: overflowToInf = !sign; break; + case FPRoundingMode.TowardsMinusInfinity: overflowToInf = sign; break; + case FPRoundingMode.TowardsZero: overflowToInf = false; break; + } + + result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); + + SoftFloat.FPProcessException(FPException.Overflow, context, fpcr); + SoftFloat.FPProcessException(FPException.Inexact, context, fpcr); + } + else if ((fpcr & FPCR.Fz) != 0 && (MathF.Abs(value) >= MathF.Pow(2f, 126))) + { + result = FPZero(sign); + + context.Fpsr |= FPSR.Ufc; + } + else + { + ulong fraction = (ulong)(op & 0x007FFFFFu) << 29; + uint exp = (op & 0x7F800000u) >> 23; + + if (exp == 0u) + { + if ((fraction & 0x0008000000000000ul) == 0ul) + { + fraction = (fraction & 0x0003FFFFFFFFFFFFul) << 2; + exp -= 1u; + } + else + { + fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1; + } + } + + uint scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44); + + uint resultExp = 253u - exp; + + uint estimate = (uint)SoftFloat.RecipEstimateTable[scaled - 256u] + 256u; + + fraction = (ulong)(estimate & 0xFFu) << 44; + + if (resultExp == 0u) + { + fraction = ((fraction & 0x000FFFFFFFFFFFFEul) | 0x0010000000000000ul) >> 1; + } + else if (resultExp + 1u == 0u) + { + fraction = ((fraction & 0x000FFFFFFFFFFFFCul) | 0x0010000000000000ul) >> 2; + resultExp = 0u; + } + + result = BitConverter.Int32BitsToSingle( + (int)((sign ? 1u : 0u) << 31 | (resultExp & 0xFFu) << 23 | (uint)(fraction >> 29) & 0x007FFFFFu)); + } + + return result; + } + + public static float FPRecipStep(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.StandardFpcrValue; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + float product; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + product = FPZero(false); + } + else + { + product = FPMulFpscr(value1, value2, true); + } + + result = FPSubFpscr(FPTwo(false), product, true); + } + + return result; + } + + public static float FPRecipStepFused(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; + + value1 = value1.FPNeg(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPTwo(false); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else + { + result = MathF.FusedMultiplyAdd(value1, value2, 2f); + + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPRecpX(float value) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; + + value.FPUnpack(out FPType type, out bool sign, out uint op, context, fpcr); + + float result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context, fpcr); + } + else + { + uint notExp = (~op >> 23) & 0xFFu; + uint maxExp = 0xFEu; + + result = BitConverter.Int32BitsToSingle( + (int)((sign ? 1u : 0u) << 31 | (notExp == 0xFFu ? maxExp : notExp) << 23)); + } + + return result; + } + + public static float FPRSqrtEstimate(float value) + { + return FPRSqrtEstimateFpscr(value, false); + } + + public static float FPRSqrtEstimateFpscr(float value, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value.FPUnpack(out FPType type, out bool sign, out uint op, context, fpcr); + + float result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context, fpcr); + } + else if (type == FPType.Zero) + { + result = FPInfinity(sign); + + SoftFloat.FPProcessException(FPException.DivideByZero, context, fpcr); + } + else if (sign) + { + result = FPDefaultNaN(); + + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + else if (type == FPType.Infinity) + { + result = FPZero(false); + } + else + { + ulong fraction = (ulong)(op & 0x007FFFFFu) << 29; + uint exp = (op & 0x7F800000u) >> 23; + + if (exp == 0u) + { + while ((fraction & 0x0008000000000000ul) == 0ul) + { + fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1; + exp -= 1u; + } + + fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1; + } + + uint scaled; + + if ((exp & 1u) == 0u) + { + scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44); + } + else + { + scaled = (uint)(((fraction & 0x000FE00000000000ul) | 0x0010000000000000ul) >> 45); + } + + uint resultExp = (380u - exp) >> 1; + + uint estimate = (uint)SoftFloat.RecipSqrtEstimateTable[scaled - 128u] + 256u; + + result = BitConverter.Int32BitsToSingle((int)((resultExp & 0xFFu) << 23 | (estimate & 0xFFu) << 15)); + } + + return result; + } + + public static float FPHalvedSub(float value1, float value2, ExecutionContext context, FPCR fpcr) + { + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if (inf1 && inf2 && sign1 == sign2) + { + result = FPDefaultNaN(); + + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + else if ((inf1 && !sign1) || (inf2 && sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && !sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == !sign2) + { + result = FPZero(sign1); + } + else + { + result = (value1 - value2) / 2.0f; + + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPRSqrtStep(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.StandardFpcrValue; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + float product; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + product = FPZero(false); + } + else + { + product = FPMulFpscr(value1, value2, true); + } + + result = FPHalvedSub(FPThree(false), product, context, fpcr); + } + + return result; + } + + public static float FPRSqrtStepFused(float value1, float value2) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; + + value1 = value1.FPNeg(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPOnePointFive(false); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else + { + result = MathF.FusedMultiplyAdd(value1, value2, 3f) / 2f; + + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPSqrt(float value) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; + + value = value.FPUnpack(out FPType type, out bool sign, out uint op, context, fpcr); + + float result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context, fpcr); + } + else if (type == FPType.Zero) + { + result = FPZero(sign); + } + else if (type == FPType.Infinity && !sign) + { + result = FPInfinity(sign); + } + else if (sign) + { + result = FPDefaultNaN(); + + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + else + { + result = MathF.Sqrt(value); + + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + + return result; + } + + public static float FPSub(float value1, float value2) + { + return FPSubFpscr(value1, value2, false); + } + + public static float FPSubFpscr(float value1, float value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out uint op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out uint op2, context, fpcr); + + float result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if (inf1 && inf2 && sign1 == sign2) + { + result = FPDefaultNaN(); + + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + else if ((inf1 && !sign1) || (inf2 && sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && !sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == !sign2) + { + result = FPZero(sign1); + } + else + { + result = value1 - value2; + + if ((fpcr & FPCR.Fz) != 0 && float.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0f); + } + } + } + + return result; + } + + public static float FPDefaultNaN() + { + return BitConverter.Int32BitsToSingle(0x7fc00000); + } + + public static float FPInfinity(bool sign) + { + return sign ? float.NegativeInfinity : float.PositiveInfinity; + } + + public static float FPZero(bool sign) + { + return sign ? -0f : +0f; + } + + public static float FPMaxNormal(bool sign) + { + return sign ? float.MinValue : float.MaxValue; + } + + private static float FPTwo(bool sign) + { + return sign ? -2f : +2f; + } + + private static float FPThree(bool sign) + { + return sign ? -3f : +3f; + } + + private static float FPOnePointFive(bool sign) + { + return sign ? -1.5f : +1.5f; + } + + private static float FPNeg(this float value) + { + return -value; + } + + private static float ZerosOrOnes(bool ones) + { + return BitConverter.Int32BitsToSingle(ones ? -1 : 0); + } + + private static float FPUnpack( + this float value, + out FPType type, + out bool sign, + out uint valueBits, + ExecutionContext context, + FPCR fpcr) + { + valueBits = (uint)BitConverter.SingleToInt32Bits(value); + + sign = (~valueBits & 0x80000000u) == 0u; + + if ((valueBits & 0x7F800000u) == 0u) + { + if ((valueBits & 0x007FFFFFu) == 0u || (fpcr & FPCR.Fz) != 0) + { + type = FPType.Zero; + value = FPZero(sign); + + if ((valueBits & 0x007FFFFFu) != 0u) + { + SoftFloat.FPProcessException(FPException.InputDenorm, context, fpcr); + } + } + else + { + type = FPType.Nonzero; + } + } + else if ((~valueBits & 0x7F800000u) == 0u) + { + if ((valueBits & 0x007FFFFFu) == 0u) + { + type = FPType.Infinity; + } + else + { + type = (~valueBits & 0x00400000u) == 0u ? FPType.QNaN : FPType.SNaN; + value = FPZero(sign); + } + } + else + { + type = FPType.Nonzero; + } + + return value; + } + + private static float FPProcessNaNs( + FPType type1, + FPType type2, + uint op1, + uint op2, + out bool done, + ExecutionContext context, + FPCR fpcr) + { + done = true; + + if (type1 == FPType.SNaN) + { + return FPProcessNaN(type1, op1, context, fpcr); + } + else if (type2 == FPType.SNaN) + { + return FPProcessNaN(type2, op2, context, fpcr); + } + else if (type1 == FPType.QNaN) + { + return FPProcessNaN(type1, op1, context, fpcr); + } + else if (type2 == FPType.QNaN) + { + return FPProcessNaN(type2, op2, context, fpcr); + } + + done = false; + + return FPZero(false); + } + + private static float FPProcessNaNs3( + FPType type1, + FPType type2, + FPType type3, + uint op1, + uint op2, + uint op3, + out bool done, + ExecutionContext context, + FPCR fpcr) + { + done = true; + + if (type1 == FPType.SNaN) + { + return FPProcessNaN(type1, op1, context, fpcr); + } + else if (type2 == FPType.SNaN) + { + return FPProcessNaN(type2, op2, context, fpcr); + } + else if (type3 == FPType.SNaN) + { + return FPProcessNaN(type3, op3, context, fpcr); + } + else if (type1 == FPType.QNaN) + { + return FPProcessNaN(type1, op1, context, fpcr); + } + else if (type2 == FPType.QNaN) + { + return FPProcessNaN(type2, op2, context, fpcr); + } + else if (type3 == FPType.QNaN) + { + return FPProcessNaN(type3, op3, context, fpcr); + } + + done = false; + + return FPZero(false); + } + + private static float FPProcessNaN(FPType type, uint op, ExecutionContext context, FPCR fpcr) + { + if (type == FPType.SNaN) + { + op |= 1u << 22; + + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + + if ((fpcr & FPCR.Dn) != 0) + { + return FPDefaultNaN(); + } + + return BitConverter.Int32BitsToSingle((int)op); + } + } + + static class SoftFloat64_16 + { + public static ushort FPConvert(double value) + { + ExecutionContext context = NativeInterface.GetContext(); + + double real = value.FPUnpackCv(out FPType type, out bool sign, out ulong valueBits, context); + + bool altHp = (context.Fpcr & FPCR.Ahp) != 0; + + ushort resultBits; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + if (altHp) + { + resultBits = SoftFloat16.FPZero(sign); + } + else if ((context.Fpcr & FPCR.Dn) != 0) + { + resultBits = SoftFloat16.FPDefaultNaN(); + } + else + { + resultBits = FPConvertNaN(valueBits); + } + + if (type == FPType.SNaN || altHp) + { + SoftFloat.FPProcessException(FPException.InvalidOp, context); + } + } + else if (type == FPType.Infinity) + { + if (altHp) + { + resultBits = (ushort)((sign ? 1u : 0u) << 15 | 0x7FFFu); + + SoftFloat.FPProcessException(FPException.InvalidOp, context); + } + else + { + resultBits = SoftFloat16.FPInfinity(sign); + } + } + else if (type == FPType.Zero) + { + resultBits = SoftFloat16.FPZero(sign); + } + else + { + resultBits = SoftFloat16.FPRoundCv(real, context); + } + + return resultBits; + } + + private static double FPUnpackCv( + this double value, + out FPType type, + out bool sign, + out ulong valueBits, + ExecutionContext context) + { + valueBits = (ulong)BitConverter.DoubleToInt64Bits(value); + + sign = (~valueBits & 0x8000000000000000ul) == 0u; + + ulong exp64 = (valueBits & 0x7FF0000000000000ul) >> 52; + ulong frac64 = valueBits & 0x000FFFFFFFFFFFFFul; + + double real; + + if (exp64 == 0u) + { + if (frac64 == 0u || (context.Fpcr & FPCR.Fz) != 0) + { + type = FPType.Zero; + real = 0d; + + if (frac64 != 0u) + { + SoftFloat.FPProcessException(FPException.InputDenorm, context); + } + } + else + { + type = FPType.Nonzero; // Subnormal. + real = Math.Pow(2d, -1022) * ((double)frac64 * Math.Pow(2d, -52)); + } + } + else if (exp64 == 0x7FFul) + { + if (frac64 == 0u) + { + type = FPType.Infinity; + real = Math.Pow(2d, 1000000); + } + else + { + type = (~frac64 & 0x0008000000000000ul) == 0u ? FPType.QNaN : FPType.SNaN; + real = 0d; + } + } + else + { + type = FPType.Nonzero; // Normal. + real = Math.Pow(2d, (int)exp64 - 1023) * (1d + (double)frac64 * Math.Pow(2d, -52)); + } + + return sign ? -real : real; + } + + private static ushort FPConvertNaN(ulong valueBits) + { + return (ushort)((valueBits & 0x8000000000000000ul) >> 48 | 0x7E00u | (valueBits & 0x0007FC0000000000ul) >> 42); + } + } + + static class SoftFloat64 + { + public static double FPAdd(double value1, double value2) + { + return FPAddFpscr(value1, value2, false); + } + + public static double FPAddFpscr(double value1, double value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if (inf1 && inf2 && sign1 == !sign2) + { + result = FPDefaultNaN(); + + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + else if ((inf1 && !sign1) || (inf2 && !sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == sign2) + { + result = FPZero(sign1); + } + else + { + result = value1 + value2; + + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static int FPCompare(double value1, double value2, bool signalNaNs) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out _, context, fpcr); + + int result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = 0b0011; + + if (type1 == FPType.SNaN || type2 == FPType.SNaN || signalNaNs) + { + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + } + else + { + if (value1 == value2) + { + result = 0b0110; + } + else if (value1 < value2) + { + result = 0b1000; + } + else + { + result = 0b0010; + } + } + + return result; + } + + public static double FPCompareEQ(double value1, double value2) + { + return FPCompareEQFpscr(value1, value2, false); + } + + public static double FPCompareEQFpscr(double value1, double value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); + + double result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = ZerosOrOnes(false); + + if (type1 == FPType.SNaN || type2 == FPType.SNaN) + { + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + } + else + { + result = ZerosOrOnes(value1 == value2); + } + + return result; + } + + public static double FPCompareGE(double value1, double value2) + { + return FPCompareGEFpscr(value1, value2, false); + } + + public static double FPCompareGEFpscr(double value1, double value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); + + double result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = ZerosOrOnes(false); + + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + else + { + result = ZerosOrOnes(value1 >= value2); + } + + return result; + } + + public static double FPCompareGT(double value1, double value2) + { + return FPCompareGTFpscr(value1, value2, false); + } + + public static double FPCompareGTFpscr(double value1, double value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); + + double result; + + if (type1 == FPType.SNaN || type1 == FPType.QNaN || type2 == FPType.SNaN || type2 == FPType.QNaN) + { + result = ZerosOrOnes(false); + + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + else + { + result = ZerosOrOnes(value1 > value2); + } + + return result; + } + + public static double FPCompareLE(double value1, double value2) + { + return FPCompareGE(value2, value1); + } + + public static double FPCompareLT(double value1, double value2) + { + return FPCompareGT(value2, value1); + } + + public static double FPCompareLEFpscr(double value1, double value2, bool standardFpscr) + { + return FPCompareGEFpscr(value2, value1, standardFpscr); + } + + public static double FPCompareLTFpscr(double value1, double value2, bool standardFpscr) + { + return FPCompareGTFpscr(value2, value1, standardFpscr); + } + + public static double FPDiv(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && inf2) || (zero1 && zero2)) + { + result = FPDefaultNaN(); + + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + else if (inf1 || zero2) + { + result = FPInfinity(sign1 ^ sign2); + + if (!inf1) + { + SoftFloat.FPProcessException(FPException.DivideByZero, context, fpcr); + } + } + else if (zero1 || inf2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 / value2; + + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPMax(double value1, double value2) + { + return FPMaxFpscr(value1, value2, false); + } + + public static double FPMaxFpscr(double value1, double value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + if (value1 > value2) + { + if (type1 == FPType.Infinity) + { + result = FPInfinity(sign1); + } + else if (type1 == FPType.Zero) + { + result = FPZero(sign1 && sign2); + } + else + { + result = value1; + + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + else + { + if (type2 == FPType.Infinity) + { + result = FPInfinity(sign2); + } + else if (type2 == FPType.Zero) + { + result = FPZero(sign1 && sign2); + } + else + { + result = value2; + + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + } + + return result; + } + + public static double FPMaxNum(double value1, double value2) + { + return FPMaxNumFpscr(value1, value2, false); + } + + public static double FPMaxNumFpscr(double value1, double value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); + + if (type1 == FPType.QNaN && type2 != FPType.QNaN) + { + value1 = FPInfinity(true); + } + else if (type1 != FPType.QNaN && type2 == FPType.QNaN) + { + value2 = FPInfinity(true); + } + + return FPMaxFpscr(value1, value2, standardFpscr); + } + + public static double FPMin(double value1, double value2) + { + return FPMinFpscr(value1, value2, false); + } + + public static double FPMinFpscr(double value1, double value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + if (value1 < value2) + { + if (type1 == FPType.Infinity) + { + result = FPInfinity(sign1); + } + else if (type1 == FPType.Zero) + { + result = FPZero(sign1 || sign2); + } + else + { + result = value1; + + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + else + { + if (type2 == FPType.Infinity) + { + result = FPInfinity(sign2); + } + else if (type2 == FPType.Zero) + { + result = FPZero(sign1 || sign2); + } + else + { + result = value2; + + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + } + + return result; + } + + public static double FPMinNum(double value1, double value2) + { + return FPMinNumFpscr(value1, value2, false); + } + + public static double FPMinNumFpscr(double value1, double value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1.FPUnpack(out FPType type1, out _, out _, context, fpcr); + value2.FPUnpack(out FPType type2, out _, out _, context, fpcr); + + if (type1 == FPType.QNaN && type2 != FPType.QNaN) + { + value1 = FPInfinity(false); + } + else if (type1 != FPType.QNaN && type2 == FPType.QNaN) + { + value2 = FPInfinity(false); + } + + return FPMinFpscr(value1, value2, standardFpscr); + } + + public static double FPMul(double value1, double value2) + { + return FPMulFpscr(value1, value2, false); + } + + public static double FPMulFpscr(double value1, double value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPDefaultNaN(); + + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else if (zero1 || zero2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 * value2; + + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPMulAdd(double valueA, double value1, double value2) + { + return FPMulAddFpscr(valueA, value1, value2, false); + } + + public static double FPMulAddFpscr(double valueA, double value1, double value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + valueA = valueA.FPUnpack(out FPType typeA, out bool signA, out ulong addend, context, fpcr); + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); + + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + double result = FPProcessNaNs3(typeA, type1, type2, addend, op1, op2, out bool done, context, fpcr); + + if (typeA == FPType.QNaN && ((inf1 && zero2) || (zero1 && inf2))) + { + result = FPDefaultNaN(); + + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + + if (!done) + { + bool infA = typeA == FPType.Infinity; bool zeroA = typeA == FPType.Zero; + + bool signP = sign1 ^ sign2; + bool infP = inf1 || inf2; + bool zeroP = zero1 || zero2; + + if ((inf1 && zero2) || (zero1 && inf2) || (infA && infP && signA != signP)) + { + result = FPDefaultNaN(); + + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + else if ((infA && !signA) || (infP && !signP)) + { + result = FPInfinity(false); + } + else if ((infA && signA) || (infP && signP)) + { + result = FPInfinity(true); + } + else if (zeroA && zeroP && signA == signP) + { + result = FPZero(signA); + } + else + { + result = Math.FusedMultiplyAdd(value1, value2, valueA); + + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPMulSub(double valueA, double value1, double value2) + { + value1 = value1.FPNeg(); + + return FPMulAdd(valueA, value1, value2); + } + + public static double FPMulSubFpscr(double valueA, double value1, double value2, bool standardFpscr) + { + value1 = value1.FPNeg(); + + return FPMulAddFpscr(valueA, value1, value2, standardFpscr); + } + + public static double FPMulX(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPTwo(sign1 ^ sign2); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else if (zero1 || zero2) + { + result = FPZero(sign1 ^ sign2); + } + else + { + result = value1 * value2; + + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPNegMulAdd(double valueA, double value1, double value2) + { + valueA = valueA.FPNeg(); + value1 = value1.FPNeg(); + + return FPMulAdd(valueA, value1, value2); + } + + public static double FPNegMulSub(double valueA, double value1, double value2) + { + valueA = valueA.FPNeg(); + + return FPMulAdd(valueA, value1, value2); + } + + public static double FPRecipEstimate(double value) + { + return FPRecipEstimateFpscr(value, false); + } + + public static double FPRecipEstimateFpscr(double value, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value.FPUnpack(out FPType type, out bool sign, out ulong op, context, fpcr); + + double result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context, fpcr); + } + else if (type == FPType.Infinity) + { + result = FPZero(sign); + } + else if (type == FPType.Zero) + { + result = FPInfinity(sign); + + SoftFloat.FPProcessException(FPException.DivideByZero, context, fpcr); + } + else if (Math.Abs(value) < Math.Pow(2d, -1024)) + { + bool overflowToInf; + + switch (fpcr.GetRoundingMode()) + { + default: + case FPRoundingMode.ToNearest: overflowToInf = true; break; + case FPRoundingMode.TowardsPlusInfinity: overflowToInf = !sign; break; + case FPRoundingMode.TowardsMinusInfinity: overflowToInf = sign; break; + case FPRoundingMode.TowardsZero: overflowToInf = false; break; + } + + result = overflowToInf ? FPInfinity(sign) : FPMaxNormal(sign); + + SoftFloat.FPProcessException(FPException.Overflow, context, fpcr); + SoftFloat.FPProcessException(FPException.Inexact, context, fpcr); + } + else if ((fpcr & FPCR.Fz) != 0 && (Math.Abs(value) >= Math.Pow(2d, 1022))) + { + result = FPZero(sign); + + context.Fpsr |= FPSR.Ufc; + } + else + { + ulong fraction = op & 0x000FFFFFFFFFFFFFul; + uint exp = (uint)((op & 0x7FF0000000000000ul) >> 52); + + if (exp == 0u) + { + if ((fraction & 0x0008000000000000ul) == 0ul) + { + fraction = (fraction & 0x0003FFFFFFFFFFFFul) << 2; + exp -= 1u; + } + else + { + fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1; + } + } + + uint scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44); + + uint resultExp = 2045u - exp; + + uint estimate = (uint)SoftFloat.RecipEstimateTable[scaled - 256u] + 256u; + + fraction = (ulong)(estimate & 0xFFu) << 44; + + if (resultExp == 0u) + { + fraction = ((fraction & 0x000FFFFFFFFFFFFEul) | 0x0010000000000000ul) >> 1; + } + else if (resultExp + 1u == 0u) + { + fraction = ((fraction & 0x000FFFFFFFFFFFFCul) | 0x0010000000000000ul) >> 2; + resultExp = 0u; + } + + result = BitConverter.Int64BitsToDouble( + (long)((sign ? 1ul : 0ul) << 63 | (resultExp & 0x7FFul) << 52 | (fraction & 0x000FFFFFFFFFFFFFul))); + } + + return result; + } + + public static double FPRecipStep(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.StandardFpcrValue; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + double product; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + product = FPZero(false); + } + else + { + product = FPMulFpscr(value1, value2, true); + } + + result = FPSubFpscr(FPTwo(false), product, true); + } + + return result; + } + + public static double FPRecipStepFused(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; + + value1 = value1.FPNeg(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPTwo(false); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else + { + result = Math.FusedMultiplyAdd(value1, value2, 2d); + + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPRecpX(double value) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; + + value.FPUnpack(out FPType type, out bool sign, out ulong op, context, fpcr); + + double result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context, fpcr); + } + else + { + ulong notExp = (~op >> 52) & 0x7FFul; + ulong maxExp = 0x7FEul; + + result = BitConverter.Int64BitsToDouble( + (long)((sign ? 1ul : 0ul) << 63 | (notExp == 0x7FFul ? maxExp : notExp) << 52)); + } + + return result; + } + + public static double FPRSqrtEstimate(double value) + { + return FPRSqrtEstimateFpscr(value, false); + } + + public static double FPRSqrtEstimateFpscr(double value, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value.FPUnpack(out FPType type, out bool sign, out ulong op, context, fpcr); + + double result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context, fpcr); + } + else if (type == FPType.Zero) + { + result = FPInfinity(sign); + + SoftFloat.FPProcessException(FPException.DivideByZero, context, fpcr); + } + else if (sign) + { + result = FPDefaultNaN(); + + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + else if (type == FPType.Infinity) + { + result = FPZero(false); + } + else + { + ulong fraction = op & 0x000FFFFFFFFFFFFFul; + uint exp = (uint)((op & 0x7FF0000000000000ul) >> 52); + + if (exp == 0u) + { + while ((fraction & 0x0008000000000000ul) == 0ul) + { + fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1; + exp -= 1u; + } + + fraction = (fraction & 0x0007FFFFFFFFFFFFul) << 1; + } + + uint scaled; + + if ((exp & 1u) == 0u) + { + scaled = (uint)(((fraction & 0x000FF00000000000ul) | 0x0010000000000000ul) >> 44); + } + else + { + scaled = (uint)(((fraction & 0x000FE00000000000ul) | 0x0010000000000000ul) >> 45); + } + + uint resultExp = (3068u - exp) >> 1; + + uint estimate = (uint)SoftFloat.RecipSqrtEstimateTable[scaled - 128u] + 256u; + + result = BitConverter.Int64BitsToDouble((long)((resultExp & 0x7FFul) << 52 | (estimate & 0xFFul) << 44)); + } + + return result; + } + + public static double FPHalvedSub(double value1, double value2, ExecutionContext context, FPCR fpcr) + { + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if (inf1 && inf2 && sign1 == sign2) + { + result = FPDefaultNaN(); + + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + else if ((inf1 && !sign1) || (inf2 && sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && !sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == !sign2) + { + result = FPZero(sign1); + } + else + { + result = (value1 - value2) / 2.0; + + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPRSqrtStep(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.StandardFpcrValue; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + double product; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + product = FPZero(false); + } + else + { + product = FPMulFpscr(value1, value2, true); + } + + result = FPHalvedSub(FPThree(false), product, context, fpcr); + } + + return result; + } + + public static double FPRSqrtStepFused(double value1, double value2) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; + + value1 = value1.FPNeg(); + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if ((inf1 && zero2) || (zero1 && inf2)) + { + result = FPOnePointFive(false); + } + else if (inf1 || inf2) + { + result = FPInfinity(sign1 ^ sign2); + } + else + { + result = Math.FusedMultiplyAdd(value1, value2, 3d) / 2d; + + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPSqrt(double value) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = context.Fpcr; + + value = value.FPUnpack(out FPType type, out bool sign, out ulong op, context, fpcr); + + double result; + + if (type == FPType.SNaN || type == FPType.QNaN) + { + result = FPProcessNaN(type, op, context, fpcr); + } + else if (type == FPType.Zero) + { + result = FPZero(sign); + } + else if (type == FPType.Infinity && !sign) + { + result = FPInfinity(sign); + } + else if (sign) + { + result = FPDefaultNaN(); + + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + else + { + result = Math.Sqrt(value); + + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + + return result; + } + + public static double FPSub(double value1, double value2) + { + return FPSubFpscr(value1, value2, false); + } + + public static double FPSubFpscr(double value1, double value2, bool standardFpscr) + { + ExecutionContext context = NativeInterface.GetContext(); + FPCR fpcr = standardFpscr ? context.StandardFpcrValue : context.Fpcr; + + value1 = value1.FPUnpack(out FPType type1, out bool sign1, out ulong op1, context, fpcr); + value2 = value2.FPUnpack(out FPType type2, out bool sign2, out ulong op2, context, fpcr); + + double result = FPProcessNaNs(type1, type2, op1, op2, out bool done, context, fpcr); + + if (!done) + { + bool inf1 = type1 == FPType.Infinity; bool zero1 = type1 == FPType.Zero; + bool inf2 = type2 == FPType.Infinity; bool zero2 = type2 == FPType.Zero; + + if (inf1 && inf2 && sign1 == sign2) + { + result = FPDefaultNaN(); + + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + else if ((inf1 && !sign1) || (inf2 && sign2)) + { + result = FPInfinity(false); + } + else if ((inf1 && sign1) || (inf2 && !sign2)) + { + result = FPInfinity(true); + } + else if (zero1 && zero2 && sign1 == !sign2) + { + result = FPZero(sign1); + } + else + { + result = value1 - value2; + + if ((fpcr & FPCR.Fz) != 0 && double.IsSubnormal(result)) + { + context.Fpsr |= FPSR.Ufc; + + result = FPZero(result < 0d); + } + } + } + + return result; + } + + public static double FPDefaultNaN() + { + return BitConverter.Int64BitsToDouble(0x7ff8000000000000); + } + + public static double FPInfinity(bool sign) + { + return sign ? double.NegativeInfinity : double.PositiveInfinity; + } + + public static double FPZero(bool sign) + { + return sign ? -0d : +0d; + } + + public static double FPMaxNormal(bool sign) + { + return sign ? double.MinValue : double.MaxValue; + } + + private static double FPTwo(bool sign) + { + return sign ? -2d : +2d; + } + + private static double FPThree(bool sign) + { + return sign ? -3d : +3d; + } + + private static double FPOnePointFive(bool sign) + { + return sign ? -1.5d : +1.5d; + } + + private static double FPNeg(this double value) + { + return -value; + } + + private static double ZerosOrOnes(bool ones) + { + return BitConverter.Int64BitsToDouble(ones ? -1L : 0L); + } + + private static double FPUnpack( + this double value, + out FPType type, + out bool sign, + out ulong valueBits, + ExecutionContext context, + FPCR fpcr) + { + valueBits = (ulong)BitConverter.DoubleToInt64Bits(value); + + sign = (~valueBits & 0x8000000000000000ul) == 0ul; + + if ((valueBits & 0x7FF0000000000000ul) == 0ul) + { + if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul || (fpcr & FPCR.Fz) != 0) + { + type = FPType.Zero; + value = FPZero(sign); + + if ((valueBits & 0x000FFFFFFFFFFFFFul) != 0ul) + { + SoftFloat.FPProcessException(FPException.InputDenorm, context, fpcr); + } + } + else + { + type = FPType.Nonzero; + } + } + else if ((~valueBits & 0x7FF0000000000000ul) == 0ul) + { + if ((valueBits & 0x000FFFFFFFFFFFFFul) == 0ul) + { + type = FPType.Infinity; + } + else + { + type = (~valueBits & 0x0008000000000000ul) == 0ul ? FPType.QNaN : FPType.SNaN; + value = FPZero(sign); + } + } + else + { + type = FPType.Nonzero; + } + + return value; + } + + private static double FPProcessNaNs( + FPType type1, + FPType type2, + ulong op1, + ulong op2, + out bool done, + ExecutionContext context, + FPCR fpcr) + { + done = true; + + if (type1 == FPType.SNaN) + { + return FPProcessNaN(type1, op1, context, fpcr); + } + else if (type2 == FPType.SNaN) + { + return FPProcessNaN(type2, op2, context, fpcr); + } + else if (type1 == FPType.QNaN) + { + return FPProcessNaN(type1, op1, context, fpcr); + } + else if (type2 == FPType.QNaN) + { + return FPProcessNaN(type2, op2, context, fpcr); + } + + done = false; + + return FPZero(false); + } + + private static double FPProcessNaNs3( + FPType type1, + FPType type2, + FPType type3, + ulong op1, + ulong op2, + ulong op3, + out bool done, + ExecutionContext context, + FPCR fpcr) + { + done = true; + + if (type1 == FPType.SNaN) + { + return FPProcessNaN(type1, op1, context, fpcr); + } + else if (type2 == FPType.SNaN) + { + return FPProcessNaN(type2, op2, context, fpcr); + } + else if (type3 == FPType.SNaN) + { + return FPProcessNaN(type3, op3, context, fpcr); + } + else if (type1 == FPType.QNaN) + { + return FPProcessNaN(type1, op1, context, fpcr); + } + else if (type2 == FPType.QNaN) + { + return FPProcessNaN(type2, op2, context, fpcr); + } + else if (type3 == FPType.QNaN) + { + return FPProcessNaN(type3, op3, context, fpcr); + } + + done = false; + + return FPZero(false); + } + + private static double FPProcessNaN(FPType type, ulong op, ExecutionContext context, FPCR fpcr) + { + if (type == FPType.SNaN) + { + op |= 1ul << 51; + + SoftFloat.FPProcessException(FPException.InvalidOp, context, fpcr); + } + + if ((fpcr & FPCR.Dn) != 0) + { + return FPDefaultNaN(); + } + + return BitConverter.Int64BitsToDouble((long)op); + } + } +} diff --git a/src/ARMeilleure/IntermediateRepresentation/BasicBlock.cs b/src/ARMeilleure/IntermediateRepresentation/BasicBlock.cs new file mode 100644 index 00000000..07bd8b67 --- /dev/null +++ b/src/ARMeilleure/IntermediateRepresentation/BasicBlock.cs @@ -0,0 +1,159 @@ +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; + +namespace ARMeilleure.IntermediateRepresentation +{ + class BasicBlock : IEquatable<BasicBlock>, IIntrusiveListNode<BasicBlock> + { + private const uint MaxSuccessors = 2; + + private int _succCount; + private BasicBlock _succ0; + private BasicBlock _succ1; + private HashSet<BasicBlock> _domFrontiers; + + public int Index { get; set; } + public BasicBlockFrequency Frequency { get; set; } + public BasicBlock ListPrevious { get; set; } + public BasicBlock ListNext { get; set; } + public IntrusiveList<Operation> Operations { get; } + public List<BasicBlock> Predecessors { get; } + public BasicBlock ImmediateDominator { get; set; } + + public int SuccessorsCount => _succCount; + + public HashSet<BasicBlock> DominanceFrontiers + { + get + { + if (_domFrontiers == null) + { + _domFrontiers = new HashSet<BasicBlock>(); + } + + return _domFrontiers; + } + } + + public BasicBlock() : this(index: -1) { } + + public BasicBlock(int index) + { + Operations = new IntrusiveList<Operation>(); + Predecessors = new List<BasicBlock>(); + + Index = index; + } + + public void AddSuccessor(BasicBlock block) + { + ArgumentNullException.ThrowIfNull(block); + + if ((uint)_succCount + 1 > MaxSuccessors) + { + ThrowSuccessorOverflow(); + } + + block.Predecessors.Add(this); + + GetSuccessorUnsafe(_succCount++) = block; + } + + public void RemoveSuccessor(int index) + { + if ((uint)index >= (uint)_succCount) + { + ThrowOutOfRange(nameof(index)); + } + + ref BasicBlock oldBlock = ref GetSuccessorUnsafe(index); + + oldBlock.Predecessors.Remove(this); + oldBlock = null; + + if (index == 0) + { + _succ0 = _succ1; + } + + _succCount--; + } + + public BasicBlock GetSuccessor(int index) + { + if ((uint)index >= (uint)_succCount) + { + ThrowOutOfRange(nameof(index)); + } + + return GetSuccessorUnsafe(index); + } + + private ref BasicBlock GetSuccessorUnsafe(int index) + { + return ref Unsafe.Add(ref _succ0, index); + } + + public void SetSuccessor(int index, BasicBlock block) + { + ArgumentNullException.ThrowIfNull(block); + + if ((uint)index >= (uint)_succCount) + { + ThrowOutOfRange(nameof(index)); + } + + ref BasicBlock oldBlock = ref GetSuccessorUnsafe(index); + + oldBlock.Predecessors.Remove(this); + block.Predecessors.Add(this); + + oldBlock = block; + } + + public void Append(Operation node) + { + Operation last = Operations.Last; + + // Append node before terminal or to end if no terminal. + if (last == default) + { + Operations.AddLast(node); + + return; + } + + switch (last.Instruction) + { + case Instruction.Return: + case Instruction.Tailcall: + case Instruction.BranchIf: + Operations.AddBefore(last, node); + break; + + default: + Operations.AddLast(node); + break; + } + } + + private static void ThrowOutOfRange(string name) => throw new ArgumentOutOfRangeException(name); + private static void ThrowSuccessorOverflow() => throw new OverflowException($"BasicBlock can only have {MaxSuccessors} successors."); + + public bool Equals(BasicBlock other) + { + return other == this; + } + + public override bool Equals(object obj) + { + return Equals(obj as BasicBlock); + } + + public override int GetHashCode() + { + return base.GetHashCode(); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/IntermediateRepresentation/BasicBlockFrequency.cs b/src/ARMeilleure/IntermediateRepresentation/BasicBlockFrequency.cs new file mode 100644 index 00000000..96cfee35 --- /dev/null +++ b/src/ARMeilleure/IntermediateRepresentation/BasicBlockFrequency.cs @@ -0,0 +1,8 @@ +namespace ARMeilleure.IntermediateRepresentation +{ + enum BasicBlockFrequency + { + Default, + Cold + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/IntermediateRepresentation/Comparison.cs b/src/ARMeilleure/IntermediateRepresentation/Comparison.cs new file mode 100644 index 00000000..628ce105 --- /dev/null +++ b/src/ARMeilleure/IntermediateRepresentation/Comparison.cs @@ -0,0 +1,24 @@ +namespace ARMeilleure.IntermediateRepresentation +{ + enum Comparison + { + Equal = 0, + NotEqual = 1, + Greater = 2, + LessOrEqual = 3, + GreaterUI = 4, + LessOrEqualUI = 5, + GreaterOrEqual = 6, + Less = 7, + GreaterOrEqualUI = 8, + LessUI = 9 + } + + static class ComparisonExtensions + { + public static Comparison Invert(this Comparison comp) + { + return (Comparison)((int)comp ^ 1); + } + } +} diff --git a/src/ARMeilleure/IntermediateRepresentation/IIntrusiveListNode.cs b/src/ARMeilleure/IntermediateRepresentation/IIntrusiveListNode.cs new file mode 100644 index 00000000..caa9b83f --- /dev/null +++ b/src/ARMeilleure/IntermediateRepresentation/IIntrusiveListNode.cs @@ -0,0 +1,8 @@ +namespace ARMeilleure.IntermediateRepresentation +{ + interface IIntrusiveListNode<T> + { + T ListPrevious { get; set; } + T ListNext { get; set; } + } +} diff --git a/src/ARMeilleure/IntermediateRepresentation/Instruction.cs b/src/ARMeilleure/IntermediateRepresentation/Instruction.cs new file mode 100644 index 00000000..b55fe1da --- /dev/null +++ b/src/ARMeilleure/IntermediateRepresentation/Instruction.cs @@ -0,0 +1,72 @@ +namespace ARMeilleure.IntermediateRepresentation +{ + enum Instruction : ushort + { + Add, + BitwiseAnd, + BitwiseExclusiveOr, + BitwiseNot, + BitwiseOr, + BranchIf, + ByteSwap, + Call, + Compare, + CompareAndSwap, + CompareAndSwap16, + CompareAndSwap8, + ConditionalSelect, + ConvertI64ToI32, + ConvertToFP, + ConvertToFPUI, + Copy, + CountLeadingZeros, + Divide, + DivideUI, + Load, + Load16, + Load8, + LoadArgument, + MemoryBarrier, + Multiply, + Multiply64HighSI, + Multiply64HighUI, + Negate, + Return, + RotateRight, + ShiftLeft, + ShiftRightSI, + ShiftRightUI, + SignExtend16, + SignExtend32, + SignExtend8, + StackAlloc, + Store, + Store16, + Store8, + Subtract, + Tailcall, + VectorCreateScalar, + VectorExtract, + VectorExtract16, + VectorExtract8, + VectorInsert, + VectorInsert16, + VectorInsert8, + VectorOne, + VectorZero, + VectorZeroUpper64, + VectorZeroUpper96, + ZeroExtend16, + ZeroExtend32, + ZeroExtend8, + + Clobber, + Extended, + Fill, + LoadFromContext, + Phi, + Spill, + SpillArg, + StoreToContext + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/IntermediateRepresentation/Intrinsic.cs b/src/ARMeilleure/IntermediateRepresentation/Intrinsic.cs new file mode 100644 index 00000000..f5a776fa --- /dev/null +++ b/src/ARMeilleure/IntermediateRepresentation/Intrinsic.cs @@ -0,0 +1,636 @@ +namespace ARMeilleure.IntermediateRepresentation +{ + enum Intrinsic : ushort + { + // X86 (SSE and AVX) + + X86Addpd, + X86Addps, + X86Addsd, + X86Addss, + X86Aesdec, + X86Aesdeclast, + X86Aesenc, + X86Aesenclast, + X86Aesimc, + X86Andnpd, + X86Andnps, + X86Andpd, + X86Andps, + X86Blendvpd, + X86Blendvps, + X86Cmppd, + X86Cmpps, + X86Cmpsd, + X86Cmpss, + X86Comisdeq, + X86Comisdge, + X86Comisdlt, + X86Comisseq, + X86Comissge, + X86Comisslt, + X86Crc32, + X86Crc32_16, + X86Crc32_8, + X86Cvtdq2pd, + X86Cvtdq2ps, + X86Cvtpd2dq, + X86Cvtpd2ps, + X86Cvtps2dq, + X86Cvtps2pd, + X86Cvtsd2si, + X86Cvtsd2ss, + X86Cvtsi2sd, + X86Cvtsi2si, + X86Cvtsi2ss, + X86Cvtss2sd, + X86Cvtss2si, + X86Divpd, + X86Divps, + X86Divsd, + X86Divss, + X86Gf2p8affineqb, + X86Haddpd, + X86Haddps, + X86Insertps, + X86Ldmxcsr, + X86Maxpd, + X86Maxps, + X86Maxsd, + X86Maxss, + X86Minpd, + X86Minps, + X86Minsd, + X86Minss, + X86Movhlps, + X86Movlhps, + X86Movss, + X86Mulpd, + X86Mulps, + X86Mulsd, + X86Mulss, + X86Paddb, + X86Paddd, + X86Paddq, + X86Paddw, + X86Palignr, + X86Pand, + X86Pandn, + X86Pavgb, + X86Pavgw, + X86Pblendvb, + X86Pclmulqdq, + X86Pcmpeqb, + X86Pcmpeqd, + X86Pcmpeqq, + X86Pcmpeqw, + X86Pcmpgtb, + X86Pcmpgtd, + X86Pcmpgtq, + X86Pcmpgtw, + X86Pmaxsb, + X86Pmaxsd, + X86Pmaxsw, + X86Pmaxub, + X86Pmaxud, + X86Pmaxuw, + X86Pminsb, + X86Pminsd, + X86Pminsw, + X86Pminub, + X86Pminud, + X86Pminuw, + X86Pmovsxbw, + X86Pmovsxdq, + X86Pmovsxwd, + X86Pmovzxbw, + X86Pmovzxdq, + X86Pmovzxwd, + X86Pmulld, + X86Pmullw, + X86Popcnt, + X86Por, + X86Pshufb, + X86Pshufd, + X86Pslld, + X86Pslldq, + X86Psllq, + X86Psllw, + X86Psrad, + X86Psraw, + X86Psrld, + X86Psrlq, + X86Psrldq, + X86Psrlw, + X86Psubb, + X86Psubd, + X86Psubq, + X86Psubw, + X86Punpckhbw, + X86Punpckhdq, + X86Punpckhqdq, + X86Punpckhwd, + X86Punpcklbw, + X86Punpckldq, + X86Punpcklqdq, + X86Punpcklwd, + X86Pxor, + X86Rcpps, + X86Rcpss, + X86Roundpd, + X86Roundps, + X86Roundsd, + X86Roundss, + X86Rsqrtps, + X86Rsqrtss, + X86Sha256Msg1, + X86Sha256Msg2, + X86Sha256Rnds2, + X86Shufpd, + X86Shufps, + X86Sqrtpd, + X86Sqrtps, + X86Sqrtsd, + X86Sqrtss, + X86Stmxcsr, + X86Subpd, + X86Subps, + X86Subsd, + X86Subss, + X86Unpckhpd, + X86Unpckhps, + X86Unpcklpd, + X86Unpcklps, + X86Vcvtph2ps, + X86Vcvtps2ph, + X86Vfmadd231pd, + X86Vfmadd231ps, + X86Vfmadd231sd, + X86Vfmadd231ss, + X86Vfmsub231sd, + X86Vfmsub231ss, + X86Vfnmadd231pd, + X86Vfnmadd231ps, + X86Vfnmadd231sd, + X86Vfnmadd231ss, + X86Vfnmsub231sd, + X86Vfnmsub231ss, + X86Vpternlogd, + X86Xorpd, + X86Xorps, + + // Arm64 (FP and Advanced SIMD) + + Arm64AbsS, + Arm64AbsV, + Arm64AddhnV, + Arm64AddpS, + Arm64AddpV, + Arm64AddvV, + Arm64AddS, + Arm64AddV, + Arm64AesdV, + Arm64AeseV, + Arm64AesimcV, + Arm64AesmcV, + Arm64AndV, + Arm64BicVi, + Arm64BicV, + Arm64BifV, + Arm64BitV, + Arm64BslV, + Arm64ClsV, + Arm64ClzV, + Arm64CmeqS, + Arm64CmeqV, + Arm64CmeqSz, + Arm64CmeqVz, + Arm64CmgeS, + Arm64CmgeV, + Arm64CmgeSz, + Arm64CmgeVz, + Arm64CmgtS, + Arm64CmgtV, + Arm64CmgtSz, + Arm64CmgtVz, + Arm64CmhiS, + Arm64CmhiV, + Arm64CmhsS, + Arm64CmhsV, + Arm64CmleSz, + Arm64CmleVz, + Arm64CmltSz, + Arm64CmltVz, + Arm64CmtstS, + Arm64CmtstV, + Arm64CntV, + Arm64DupSe, + Arm64DupVe, + Arm64DupGp, + Arm64EorV, + Arm64ExtV, + Arm64FabdS, + Arm64FabdV, + Arm64FabsV, + Arm64FabsS, + Arm64FacgeS, + Arm64FacgeV, + Arm64FacgtS, + Arm64FacgtV, + Arm64FaddpS, + Arm64FaddpV, + Arm64FaddV, + Arm64FaddS, + Arm64FccmpeS, + Arm64FccmpS, + Arm64FcmeqS, + Arm64FcmeqV, + Arm64FcmeqSz, + Arm64FcmeqVz, + Arm64FcmgeS, + Arm64FcmgeV, + Arm64FcmgeSz, + Arm64FcmgeVz, + Arm64FcmgtS, + Arm64FcmgtV, + Arm64FcmgtSz, + Arm64FcmgtVz, + Arm64FcmleSz, + Arm64FcmleVz, + Arm64FcmltSz, + Arm64FcmltVz, + Arm64FcmpeS, + Arm64FcmpS, + Arm64FcselS, + Arm64FcvtasS, + Arm64FcvtasV, + Arm64FcvtasGp, + Arm64FcvtauS, + Arm64FcvtauV, + Arm64FcvtauGp, + Arm64FcvtlV, + Arm64FcvtmsS, + Arm64FcvtmsV, + Arm64FcvtmsGp, + Arm64FcvtmuS, + Arm64FcvtmuV, + Arm64FcvtmuGp, + Arm64FcvtnsS, + Arm64FcvtnsV, + Arm64FcvtnsGp, + Arm64FcvtnuS, + Arm64FcvtnuV, + Arm64FcvtnuGp, + Arm64FcvtnV, + Arm64FcvtpsS, + Arm64FcvtpsV, + Arm64FcvtpsGp, + Arm64FcvtpuS, + Arm64FcvtpuV, + Arm64FcvtpuGp, + Arm64FcvtxnS, + Arm64FcvtxnV, + Arm64FcvtzsSFixed, + Arm64FcvtzsVFixed, + Arm64FcvtzsS, + Arm64FcvtzsV, + Arm64FcvtzsGpFixed, + Arm64FcvtzsGp, + Arm64FcvtzuSFixed, + Arm64FcvtzuVFixed, + Arm64FcvtzuS, + Arm64FcvtzuV, + Arm64FcvtzuGpFixed, + Arm64FcvtzuGp, + Arm64FcvtS, + Arm64FdivV, + Arm64FdivS, + Arm64FmaddS, + Arm64FmaxnmpS, + Arm64FmaxnmpV, + Arm64FmaxnmvV, + Arm64FmaxnmV, + Arm64FmaxnmS, + Arm64FmaxpS, + Arm64FmaxpV, + Arm64FmaxvV, + Arm64FmaxV, + Arm64FmaxS, + Arm64FminnmpS, + Arm64FminnmpV, + Arm64FminnmvV, + Arm64FminnmV, + Arm64FminnmS, + Arm64FminpS, + Arm64FminpV, + Arm64FminvV, + Arm64FminV, + Arm64FminS, + Arm64FmlaSe, + Arm64FmlaVe, + Arm64FmlaV, + Arm64FmlsSe, + Arm64FmlsVe, + Arm64FmlsV, + Arm64FmovVi, + Arm64FmovS, + Arm64FmovGp, + Arm64FmovSi, + Arm64FmsubS, + Arm64FmulxSe, + Arm64FmulxVe, + Arm64FmulxS, + Arm64FmulxV, + Arm64FmulSe, + Arm64FmulVe, + Arm64FmulV, + Arm64FmulS, + Arm64FnegV, + Arm64FnegS, + Arm64FnmaddS, + Arm64FnmsubS, + Arm64FnmulS, + Arm64FrecpeS, + Arm64FrecpeV, + Arm64FrecpsS, + Arm64FrecpsV, + Arm64FrecpxS, + Arm64FrintaV, + Arm64FrintaS, + Arm64FrintiV, + Arm64FrintiS, + Arm64FrintmV, + Arm64FrintmS, + Arm64FrintnV, + Arm64FrintnS, + Arm64FrintpV, + Arm64FrintpS, + Arm64FrintxV, + Arm64FrintxS, + Arm64FrintzV, + Arm64FrintzS, + Arm64FrsqrteS, + Arm64FrsqrteV, + Arm64FrsqrtsS, + Arm64FrsqrtsV, + Arm64FsqrtV, + Arm64FsqrtS, + Arm64FsubV, + Arm64FsubS, + Arm64InsVe, + Arm64InsGp, + Arm64Ld1rV, + Arm64Ld1Vms, + Arm64Ld1Vss, + Arm64Ld2rV, + Arm64Ld2Vms, + Arm64Ld2Vss, + Arm64Ld3rV, + Arm64Ld3Vms, + Arm64Ld3Vss, + Arm64Ld4rV, + Arm64Ld4Vms, + Arm64Ld4Vss, + Arm64MlaVe, + Arm64MlaV, + Arm64MlsVe, + Arm64MlsV, + Arm64MoviV, + Arm64MrsFpcr, + Arm64MsrFpcr, + Arm64MrsFpsr, + Arm64MsrFpsr, + Arm64MulVe, + Arm64MulV, + Arm64MvniV, + Arm64NegS, + Arm64NegV, + Arm64NotV, + Arm64OrnV, + Arm64OrrVi, + Arm64OrrV, + Arm64PmullV, + Arm64PmulV, + Arm64RaddhnV, + Arm64RbitV, + Arm64Rev16V, + Arm64Rev32V, + Arm64Rev64V, + Arm64RshrnV, + Arm64RsubhnV, + Arm64SabalV, + Arm64SabaV, + Arm64SabdlV, + Arm64SabdV, + Arm64SadalpV, + Arm64SaddlpV, + Arm64SaddlvV, + Arm64SaddlV, + Arm64SaddwV, + Arm64ScvtfSFixed, + Arm64ScvtfVFixed, + Arm64ScvtfS, + Arm64ScvtfV, + Arm64ScvtfGpFixed, + Arm64ScvtfGp, + Arm64Sha1cV, + Arm64Sha1hV, + Arm64Sha1mV, + Arm64Sha1pV, + Arm64Sha1su0V, + Arm64Sha1su1V, + Arm64Sha256h2V, + Arm64Sha256hV, + Arm64Sha256su0V, + Arm64Sha256su1V, + Arm64ShaddV, + Arm64ShllV, + Arm64ShlS, + Arm64ShlV, + Arm64ShrnV, + Arm64ShsubV, + Arm64SliS, + Arm64SliV, + Arm64SmaxpV, + Arm64SmaxvV, + Arm64SmaxV, + Arm64SminpV, + Arm64SminvV, + Arm64SminV, + Arm64SmlalVe, + Arm64SmlalV, + Arm64SmlslVe, + Arm64SmlslV, + Arm64SmovV, + Arm64SmullVe, + Arm64SmullV, + Arm64SqabsS, + Arm64SqabsV, + Arm64SqaddS, + Arm64SqaddV, + Arm64SqdmlalSe, + Arm64SqdmlalVe, + Arm64SqdmlalS, + Arm64SqdmlalV, + Arm64SqdmlslSe, + Arm64SqdmlslVe, + Arm64SqdmlslS, + Arm64SqdmlslV, + Arm64SqdmulhSe, + Arm64SqdmulhVe, + Arm64SqdmulhS, + Arm64SqdmulhV, + Arm64SqdmullSe, + Arm64SqdmullVe, + Arm64SqdmullS, + Arm64SqdmullV, + Arm64SqnegS, + Arm64SqnegV, + Arm64SqrdmulhSe, + Arm64SqrdmulhVe, + Arm64SqrdmulhS, + Arm64SqrdmulhV, + Arm64SqrshlS, + Arm64SqrshlV, + Arm64SqrshrnS, + Arm64SqrshrnV, + Arm64SqrshrunS, + Arm64SqrshrunV, + Arm64SqshluS, + Arm64SqshluV, + Arm64SqshlSi, + Arm64SqshlVi, + Arm64SqshlS, + Arm64SqshlV, + Arm64SqshrnS, + Arm64SqshrnV, + Arm64SqshrunS, + Arm64SqshrunV, + Arm64SqsubS, + Arm64SqsubV, + Arm64SqxtnS, + Arm64SqxtnV, + Arm64SqxtunS, + Arm64SqxtunV, + Arm64SrhaddV, + Arm64SriS, + Arm64SriV, + Arm64SrshlS, + Arm64SrshlV, + Arm64SrshrS, + Arm64SrshrV, + Arm64SrsraS, + Arm64SrsraV, + Arm64SshllV, + Arm64SshlS, + Arm64SshlV, + Arm64SshrS, + Arm64SshrV, + Arm64SsraS, + Arm64SsraV, + Arm64SsublV, + Arm64SsubwV, + Arm64St1Vms, + Arm64St1Vss, + Arm64St2Vms, + Arm64St2Vss, + Arm64St3Vms, + Arm64St3Vss, + Arm64St4Vms, + Arm64St4Vss, + Arm64SubhnV, + Arm64SubS, + Arm64SubV, + Arm64SuqaddS, + Arm64SuqaddV, + Arm64TblV, + Arm64TbxV, + Arm64Trn1V, + Arm64Trn2V, + Arm64UabalV, + Arm64UabaV, + Arm64UabdlV, + Arm64UabdV, + Arm64UadalpV, + Arm64UaddlpV, + Arm64UaddlvV, + Arm64UaddlV, + Arm64UaddwV, + Arm64UcvtfSFixed, + Arm64UcvtfVFixed, + Arm64UcvtfS, + Arm64UcvtfV, + Arm64UcvtfGpFixed, + Arm64UcvtfGp, + Arm64UhaddV, + Arm64UhsubV, + Arm64UmaxpV, + Arm64UmaxvV, + Arm64UmaxV, + Arm64UminpV, + Arm64UminvV, + Arm64UminV, + Arm64UmlalVe, + Arm64UmlalV, + Arm64UmlslVe, + Arm64UmlslV, + Arm64UmovV, + Arm64UmullVe, + Arm64UmullV, + Arm64UqaddS, + Arm64UqaddV, + Arm64UqrshlS, + Arm64UqrshlV, + Arm64UqrshrnS, + Arm64UqrshrnV, + Arm64UqshlSi, + Arm64UqshlVi, + Arm64UqshlS, + Arm64UqshlV, + Arm64UqshrnS, + Arm64UqshrnV, + Arm64UqsubS, + Arm64UqsubV, + Arm64UqxtnS, + Arm64UqxtnV, + Arm64UrecpeV, + Arm64UrhaddV, + Arm64UrshlS, + Arm64UrshlV, + Arm64UrshrS, + Arm64UrshrV, + Arm64UrsqrteV, + Arm64UrsraS, + Arm64UrsraV, + Arm64UshllV, + Arm64UshlS, + Arm64UshlV, + Arm64UshrS, + Arm64UshrV, + Arm64UsqaddS, + Arm64UsqaddV, + Arm64UsraS, + Arm64UsraV, + Arm64UsublV, + Arm64UsubwV, + Arm64Uzp1V, + Arm64Uzp2V, + Arm64XtnV, + Arm64Zip1V, + Arm64Zip2V, + + Arm64VTypeShift = 13, + Arm64VTypeMask = 1 << Arm64VTypeShift, + Arm64V64 = 0 << Arm64VTypeShift, + Arm64V128 = 1 << Arm64VTypeShift, + + Arm64VSizeShift = 14, + Arm64VSizeMask = 3 << Arm64VSizeShift, + Arm64VFloat = 0 << Arm64VSizeShift, + Arm64VDouble = 1 << Arm64VSizeShift, + Arm64VByte = 0 << Arm64VSizeShift, + Arm64VHWord = 1 << Arm64VSizeShift, + Arm64VWord = 2 << Arm64VSizeShift, + Arm64VDWord = 3 << Arm64VSizeShift + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/IntermediateRepresentation/IntrusiveList.cs b/src/ARMeilleure/IntermediateRepresentation/IntrusiveList.cs new file mode 100644 index 00000000..184df87c --- /dev/null +++ b/src/ARMeilleure/IntermediateRepresentation/IntrusiveList.cs @@ -0,0 +1,208 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Runtime.CompilerServices; + +namespace ARMeilleure.IntermediateRepresentation +{ + /// <summary> + /// Represents a efficient linked list that stores the pointer on the object directly and does not allocate. + /// </summary> + /// <typeparam name="T">Type of the list items</typeparam> + class IntrusiveList<T> where T : IEquatable<T>, IIntrusiveListNode<T> + { + /// <summary> + /// First item of the list, or null if empty. + /// </summary> + public T First { get; private set; } + + /// <summary> + /// Last item of the list, or null if empty. + /// </summary> + public T Last { get; private set; } + + /// <summary> + /// Total number of items on the list. + /// </summary> + public int Count { get; private set; } + + /// <summary> + /// Initializes a new instance of the <see cref="IntrusiveList{T}"/> class. + /// </summary> + /// <exception cref="ArgumentException"><typeparamref name="T"/> is not pointer sized.</exception> + public IntrusiveList() + { + if (Unsafe.SizeOf<T>() != IntPtr.Size) + { + throw new ArgumentException("T must be a reference type or a pointer sized struct."); + } + } + + /// <summary> + /// Adds a item as the first item of the list. + /// </summary> + /// <param name="newNode">Item to be added</param> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public T AddFirst(T newNode) + { + if (!EqualsNull(First)) + { + return AddBefore(First, newNode); + } + else + { + Debug.Assert(EqualsNull(newNode.ListPrevious)); + Debug.Assert(EqualsNull(newNode.ListNext)); + Debug.Assert(EqualsNull(Last)); + + First = newNode; + Last = newNode; + + Debug.Assert(Count == 0); + + Count = 1; + + return newNode; + } + } + + /// <summary> + /// Adds a item as the last item of the list. + /// </summary> + /// <param name="newNode">Item to be added</param> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public T AddLast(T newNode) + { + if (!EqualsNull(Last)) + { + return AddAfter(Last, newNode); + } + else + { + Debug.Assert(EqualsNull(newNode.ListPrevious)); + Debug.Assert(EqualsNull(newNode.ListNext)); + Debug.Assert(EqualsNull(First)); + + First = newNode; + Last = newNode; + + Debug.Assert(Count == 0); + + Count = 1; + + return newNode; + } + } + + /// <summary> + /// Adds a item before a existing item on the list. + /// </summary> + /// <param name="node">Item on the list that will succeed the new item</param> + /// <param name="newNode">Item to be added</param> + /// <returns>New item</returns> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public T AddBefore(T node, T newNode) + { + Debug.Assert(EqualsNull(newNode.ListPrevious)); + Debug.Assert(EqualsNull(newNode.ListNext)); + + newNode.ListPrevious = node.ListPrevious; + newNode.ListNext = node; + + node.ListPrevious = newNode; + + if (!EqualsNull(newNode.ListPrevious)) + { + newNode.ListPrevious.ListNext = newNode; + } + + if (Equals(First, node)) + { + First = newNode; + } + + Count++; + + return newNode; + } + + /// <summary> + /// Adds a item after a existing item on the list. + /// </summary> + /// <param name="node">Item on the list that will preceed the new item</param> + /// <param name="newNode">Item to be added</param> + /// <returns>New item</returns> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public T AddAfter(T node, T newNode) + { + Debug.Assert(EqualsNull(newNode.ListPrevious)); + Debug.Assert(EqualsNull(newNode.ListNext)); + + newNode.ListPrevious = node; + newNode.ListNext = node.ListNext; + + node.ListNext = newNode; + + if (!EqualsNull(newNode.ListNext)) + { + newNode.ListNext.ListPrevious = newNode; + } + + if (Equals(Last, node)) + { + Last = newNode; + } + + Count++; + + return newNode; + } + + /// <summary> + /// Removes a item from the list. + /// </summary> + /// <param name="node">The item to be removed</param> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Remove(T node) + { + if (!EqualsNull(node.ListPrevious)) + { + node.ListPrevious.ListNext = node.ListNext; + } + else + { + Debug.Assert(Equals(First, node)); + + First = node.ListNext; + } + + if (!EqualsNull(node.ListNext)) + { + node.ListNext.ListPrevious = node.ListPrevious; + } + else + { + Debug.Assert(Equals(Last, node)); + + Last = node.ListPrevious; + } + + node.ListPrevious = default; + node.ListNext = default; + + Count--; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool EqualsNull(T a) + { + return EqualityComparer<T>.Default.Equals(a, default); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool Equals(T a, T b) + { + return EqualityComparer<T>.Default.Equals(a, b); + } + } +} diff --git a/src/ARMeilleure/IntermediateRepresentation/MemoryOperand.cs b/src/ARMeilleure/IntermediateRepresentation/MemoryOperand.cs new file mode 100644 index 00000000..07d2633b --- /dev/null +++ b/src/ARMeilleure/IntermediateRepresentation/MemoryOperand.cs @@ -0,0 +1,54 @@ +using System; +using System.Diagnostics; +using System.Runtime.CompilerServices; + +namespace ARMeilleure.IntermediateRepresentation +{ + unsafe struct MemoryOperand + { + private struct Data + { +#pragma warning disable CS0649 + public byte Kind; + public byte Type; +#pragma warning restore CS0649 + public byte Scale; + public Operand BaseAddress; + public Operand Index; + public int Displacement; + } + + private Data* _data; + + public MemoryOperand(Operand operand) + { + Debug.Assert(operand.Kind == OperandKind.Memory); + + _data = (Data*)Unsafe.As<Operand, IntPtr>(ref operand); + } + + public Operand BaseAddress + { + get => _data->BaseAddress; + set => _data->BaseAddress = value; + } + + public Operand Index + { + get => _data->Index; + set => _data->Index = value; + } + + public Multiplier Scale + { + get => (Multiplier)_data->Scale; + set => _data->Scale = (byte)value; + } + + public int Displacement + { + get => _data->Displacement; + set => _data->Displacement = value; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/IntermediateRepresentation/Multiplier.cs b/src/ARMeilleure/IntermediateRepresentation/Multiplier.cs new file mode 100644 index 00000000..d6bc7d99 --- /dev/null +++ b/src/ARMeilleure/IntermediateRepresentation/Multiplier.cs @@ -0,0 +1,11 @@ +namespace ARMeilleure.IntermediateRepresentation +{ + enum Multiplier + { + x1 = 0, + x2 = 1, + x4 = 2, + x8 = 3, + x16 = 4 + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/IntermediateRepresentation/Operand.cs b/src/ARMeilleure/IntermediateRepresentation/Operand.cs new file mode 100644 index 00000000..9e8de3ba --- /dev/null +++ b/src/ARMeilleure/IntermediateRepresentation/Operand.cs @@ -0,0 +1,594 @@ +using ARMeilleure.CodeGen.Linking; +using ARMeilleure.Common; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Runtime.CompilerServices; + +namespace ARMeilleure.IntermediateRepresentation +{ + unsafe struct Operand : IEquatable<Operand> + { + internal struct Data + { + public byte Kind; + public byte Type; + public byte SymbolType; + public byte Padding; // Unused space. + public ushort AssignmentsCount; + public ushort AssignmentsCapacity; + public uint UsesCount; + public uint UsesCapacity; + public Operation* Assignments; + public Operation* Uses; + public ulong Value; + public ulong SymbolValue; + } + + private Data* _data; + + public OperandKind Kind + { + get => (OperandKind)_data->Kind; + private set => _data->Kind = (byte)value; + } + + public OperandType Type + { + get => (OperandType)_data->Type; + private set => _data->Type = (byte)value; + } + + public ulong Value + { + get => _data->Value; + private set => _data->Value = value; + } + + public Symbol Symbol + { + get + { + Debug.Assert(Kind != OperandKind.Memory); + + return new Symbol((SymbolType)_data->SymbolType, _data->SymbolValue); + } + private set + { + Debug.Assert(Kind != OperandKind.Memory); + + if (value.Type == SymbolType.None) + { + _data->SymbolType = (byte)SymbolType.None; + } + else + { + _data->SymbolType = (byte)value.Type; + _data->SymbolValue = value.Value; + } + } + } + + public ReadOnlySpan<Operation> Assignments + { + get + { + Debug.Assert(Kind != OperandKind.Memory); + + return new ReadOnlySpan<Operation>(_data->Assignments, _data->AssignmentsCount); + } + } + + public ReadOnlySpan<Operation> Uses + { + get + { + Debug.Assert(Kind != OperandKind.Memory); + + return new ReadOnlySpan<Operation>(_data->Uses, (int)_data->UsesCount); + } + } + + public int UsesCount => (int)_data->UsesCount; + public int AssignmentsCount => _data->AssignmentsCount; + + public bool Relocatable => Symbol.Type != SymbolType.None; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public Register GetRegister() + { + Debug.Assert(Kind == OperandKind.Register); + + return new Register((int)Value & 0xffffff, (RegisterType)(Value >> 24)); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public MemoryOperand GetMemory() + { + Debug.Assert(Kind == OperandKind.Memory); + + return new MemoryOperand(this); + } + + public int GetLocalNumber() + { + Debug.Assert(Kind == OperandKind.LocalVariable); + + return (int)Value; + } + + public byte AsByte() + { + return (byte)Value; + } + + public short AsInt16() + { + return (short)Value; + } + + public int AsInt32() + { + return (int)Value; + } + + public long AsInt64() + { + return (long)Value; + } + + public float AsFloat() + { + return BitConverter.Int32BitsToSingle((int)Value); + } + + public double AsDouble() + { + return BitConverter.Int64BitsToDouble((long)Value); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal ref ulong GetValueUnsafe() + { + return ref _data->Value; + } + + internal void NumberLocal(int number) + { + if (Kind != OperandKind.LocalVariable) + { + throw new InvalidOperationException("The operand is not a local variable."); + } + + Value = (ulong)number; + } + + public void AddAssignment(Operation operation) + { + if (Kind == OperandKind.LocalVariable) + { + Add(operation, ref _data->Assignments, ref _data->AssignmentsCount, ref _data->AssignmentsCapacity); + } + else if (Kind == OperandKind.Memory) + { + MemoryOperand memOp = GetMemory(); + Operand addr = memOp.BaseAddress; + Operand index = memOp.Index; + + if (addr != default) + { + Add(operation, ref addr._data->Assignments, ref addr._data->AssignmentsCount, ref addr._data->AssignmentsCapacity); + } + + if (index != default) + { + Add(operation, ref index._data->Assignments, ref index._data->AssignmentsCount, ref index._data->AssignmentsCapacity); + } + } + } + + public void RemoveAssignment(Operation operation) + { + if (Kind == OperandKind.LocalVariable) + { + Remove(operation, ref _data->Assignments, ref _data->AssignmentsCount); + } + else if (Kind == OperandKind.Memory) + { + MemoryOperand memOp = GetMemory(); + Operand addr = memOp.BaseAddress; + Operand index = memOp.Index; + + if (addr != default) + { + Remove(operation, ref addr._data->Assignments, ref addr._data->AssignmentsCount); + } + + if (index != default) + { + Remove(operation, ref index._data->Assignments, ref index._data->AssignmentsCount); + } + } + } + + public void AddUse(Operation operation) + { + if (Kind == OperandKind.LocalVariable) + { + Add(operation, ref _data->Uses, ref _data->UsesCount, ref _data->UsesCapacity); + } + else if (Kind == OperandKind.Memory) + { + MemoryOperand memOp = GetMemory(); + Operand addr = memOp.BaseAddress; + Operand index = memOp.Index; + + if (addr != default) + { + Add(operation, ref addr._data->Uses, ref addr._data->UsesCount, ref addr._data->UsesCapacity); + } + + if (index != default) + { + Add(operation, ref index._data->Uses, ref index._data->UsesCount, ref index._data->UsesCapacity); + } + } + } + + public void RemoveUse(Operation operation) + { + if (Kind == OperandKind.LocalVariable) + { + Remove(operation, ref _data->Uses, ref _data->UsesCount); + } + else if (Kind == OperandKind.Memory) + { + MemoryOperand memOp = GetMemory(); + Operand addr = memOp.BaseAddress; + Operand index = memOp.Index; + + if (addr != default) + { + Remove(operation, ref addr._data->Uses, ref addr._data->UsesCount); + } + + if (index != default) + { + Remove(operation, ref index._data->Uses, ref index._data->UsesCount); + } + } + } + + public Span<Operation> GetUses(ref Span<Operation> buffer) + { + ReadOnlySpan<Operation> uses = Uses; + + if (buffer.Length < uses.Length) + { + buffer = Allocators.Default.AllocateSpan<Operation>((uint)uses.Length); + } + + uses.CopyTo(buffer); + + return buffer.Slice(0, uses.Length); + } + + private static void New<T>(ref T* data, ref ushort count, ref ushort capacity, ushort initialCapacity) where T : unmanaged + { + count = 0; + capacity = initialCapacity; + data = Allocators.References.Allocate<T>(initialCapacity); + } + + private static void New<T>(ref T* data, ref uint count, ref uint capacity, uint initialCapacity) where T : unmanaged + { + count = 0; + capacity = initialCapacity; + data = Allocators.References.Allocate<T>(initialCapacity); + } + + private static void Add<T>(T item, ref T* data, ref ushort count, ref ushort capacity) where T : unmanaged + { + if (count < capacity) + { + data[(uint)count++] = item; + + return; + } + + // Could not add item in the fast path, fallback onto the slow path. + ExpandAdd(item, ref data, ref count, ref capacity); + + static void ExpandAdd(T item, ref T* data, ref ushort count, ref ushort capacity) + { + ushort newCount = checked((ushort)(count + 1)); + ushort newCapacity = (ushort)Math.Min(capacity * 2, ushort.MaxValue); + + var oldSpan = new Span<T>(data, count); + + capacity = newCapacity; + data = Allocators.References.Allocate<T>(capacity); + + oldSpan.CopyTo(new Span<T>(data, count)); + + data[count] = item; + count = newCount; + } + } + + private static void Add<T>(T item, ref T* data, ref uint count, ref uint capacity) where T : unmanaged + { + if (count < capacity) + { + data[count++] = item; + + return; + } + + // Could not add item in the fast path, fallback onto the slow path. + ExpandAdd(item, ref data, ref count, ref capacity); + + static void ExpandAdd(T item, ref T* data, ref uint count, ref uint capacity) + { + uint newCount = checked(count + 1); + uint newCapacity = (uint)Math.Min(capacity * 2, int.MaxValue); + + if (newCapacity <= capacity) + { + throw new OverflowException(); + } + + var oldSpan = new Span<T>(data, (int)count); + + capacity = newCapacity; + data = Allocators.References.Allocate<T>(capacity); + + oldSpan.CopyTo(new Span<T>(data, (int)count)); + + data[count] = item; + count = newCount; + } + } + + private static void Remove<T>(in T item, ref T* data, ref ushort count) where T : unmanaged + { + var span = new Span<T>(data, count); + + for (int i = 0; i < span.Length; i++) + { + if (EqualityComparer<T>.Default.Equals(span[i], item)) + { + if (i + 1 < count) + { + span.Slice(i + 1).CopyTo(span.Slice(i)); + } + + count--; + + return; + } + } + } + + private static void Remove<T>(in T item, ref T* data, ref uint count) where T : unmanaged + { + var span = new Span<T>(data, (int)count); + + for (int i = 0; i < span.Length; i++) + { + if (EqualityComparer<T>.Default.Equals(span[i], item)) + { + if (i + 1 < count) + { + span.Slice(i + 1).CopyTo(span.Slice(i)); + } + + count--; + + return; + } + } + } + + public override int GetHashCode() + { + return ((ulong)_data).GetHashCode(); + } + + public bool Equals(Operand operand) + { + return operand._data == _data; + } + + public override bool Equals(object obj) + { + return obj is Operand operand && Equals(operand); + } + + public static bool operator ==(Operand a, Operand b) + { + return a.Equals(b); + } + + public static bool operator !=(Operand a, Operand b) + { + return !a.Equals(b); + } + + public static class Factory + { + private const int InternTableSize = 256; + private const int InternTableProbeLength = 8; + + [ThreadStatic] + private static Data* _internTable; + + private static Data* InternTable + { + get + { + if (_internTable == null) + { + _internTable = (Data*)NativeAllocator.Instance.Allocate((uint)sizeof(Data) * InternTableSize); + + // Make sure the table is zeroed. + new Span<Data>(_internTable, InternTableSize).Clear(); + } + + return _internTable; + } + } + + private static Operand Make(OperandKind kind, OperandType type, ulong value, Symbol symbol = default) + { + Debug.Assert(kind != OperandKind.None); + + Data* data = null; + + // If constant or register, then try to look up in the intern table before allocating. + if (kind == OperandKind.Constant || kind == OperandKind.Register) + { + uint hash = (uint)HashCode.Combine(kind, type, value); + + // Look in the next InternTableProbeLength slots for a match. + for (uint i = 0; i < InternTableProbeLength; i++) + { + Operand interned = new(); + interned._data = &InternTable[(hash + i) % InternTableSize]; + + // If slot matches the allocation request then return that slot. + if (interned.Kind == kind && interned.Type == type && interned.Value == value && interned.Symbol == symbol) + { + return interned; + } + // Otherwise if the slot is not occupied, we store in that slot. + else if (interned.Kind == OperandKind.None) + { + data = interned._data; + + break; + } + } + } + + // If we could not get a slot from the intern table, we allocate somewhere else and store there. + if (data == null) + { + data = Allocators.Operands.Allocate<Data>(); + } + + *data = default; + + Operand result = new(); + result._data = data; + result.Value = value; + result.Kind = kind; + result.Type = type; + + if (kind != OperandKind.Memory) + { + result.Symbol = symbol; + } + + // If local variable, then the use and def list is initialized with default sizes. + if (kind == OperandKind.LocalVariable) + { + New(ref result._data->Assignments, ref result._data->AssignmentsCount, ref result._data->AssignmentsCapacity, 1); + New(ref result._data->Uses, ref result._data->UsesCount, ref result._data->UsesCapacity, 4); + } + + return result; + } + + public static Operand Const(OperandType type, long value) + { + Debug.Assert(type is OperandType.I32 or OperandType.I64); + + return type == OperandType.I32 ? Const((int)value) : Const(value); + } + + public static Operand Const(bool value) + { + return Const(value ? 1 : 0); + } + + public static Operand Const(int value) + { + return Const((uint)value); + } + + public static Operand Const(uint value) + { + return Make(OperandKind.Constant, OperandType.I32, value); + } + + public static Operand Const(long value) + { + return Const(value, symbol: default); + } + + public static Operand Const<T>(ref T reference, Symbol symbol = default) + { + return Const((long)Unsafe.AsPointer(ref reference), symbol); + } + + public static Operand Const(long value, Symbol symbol) + { + return Make(OperandKind.Constant, OperandType.I64, (ulong)value, symbol); + } + + public static Operand Const(ulong value) + { + return Make(OperandKind.Constant, OperandType.I64, value); + } + + public static Operand ConstF(float value) + { + return Make(OperandKind.Constant, OperandType.FP32, (ulong)BitConverter.SingleToInt32Bits(value)); + } + + public static Operand ConstF(double value) + { + return Make(OperandKind.Constant, OperandType.FP64, (ulong)BitConverter.DoubleToInt64Bits(value)); + } + + public static Operand Label() + { + return Make(OperandKind.Label, OperandType.None, 0); + } + + public static Operand Local(OperandType type) + { + return Make(OperandKind.LocalVariable, type, 0); + } + + public static Operand Register(int index, RegisterType regType, OperandType type) + { + return Make(OperandKind.Register, type, (ulong)((int)regType << 24 | index)); + } + + public static Operand Undef() + { + return Make(OperandKind.Undefined, OperandType.None, 0); + } + + public static Operand MemoryOp( + OperandType type, + Operand baseAddress, + Operand index = default, + Multiplier scale = Multiplier.x1, + int displacement = 0) + { + Operand result = Make(OperandKind.Memory, type, 0); + + MemoryOperand memory = result.GetMemory(); + memory.BaseAddress = baseAddress; + memory.Index = index; + memory.Scale = scale; + memory.Displacement = displacement; + + return result; + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/IntermediateRepresentation/OperandKind.cs b/src/ARMeilleure/IntermediateRepresentation/OperandKind.cs new file mode 100644 index 00000000..adb83561 --- /dev/null +++ b/src/ARMeilleure/IntermediateRepresentation/OperandKind.cs @@ -0,0 +1,13 @@ +namespace ARMeilleure.IntermediateRepresentation +{ + enum OperandKind + { + None, + Constant, + Label, + LocalVariable, + Memory, + Register, + Undefined + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/IntermediateRepresentation/OperandType.cs b/src/ARMeilleure/IntermediateRepresentation/OperandType.cs new file mode 100644 index 00000000..81b22cf5 --- /dev/null +++ b/src/ARMeilleure/IntermediateRepresentation/OperandType.cs @@ -0,0 +1,65 @@ +using System; + +namespace ARMeilleure.IntermediateRepresentation +{ + enum OperandType + { + None, + I32, + I64, + FP32, + FP64, + V128 + } + + static class OperandTypeExtensions + { + public static bool IsInteger(this OperandType type) + { + return type == OperandType.I32 || + type == OperandType.I64; + } + + public static RegisterType ToRegisterType(this OperandType type) + { + switch (type) + { + case OperandType.FP32: return RegisterType.Vector; + case OperandType.FP64: return RegisterType.Vector; + case OperandType.I32: return RegisterType.Integer; + case OperandType.I64: return RegisterType.Integer; + case OperandType.V128: return RegisterType.Vector; + } + + throw new InvalidOperationException($"Invalid operand type \"{type}\"."); + } + + public static int GetSizeInBytes(this OperandType type) + { + switch (type) + { + case OperandType.FP32: return 4; + case OperandType.FP64: return 8; + case OperandType.I32: return 4; + case OperandType.I64: return 8; + case OperandType.V128: return 16; + } + + throw new InvalidOperationException($"Invalid operand type \"{type}\"."); + } + + public static int GetSizeInBytesLog2(this OperandType type) + { + switch (type) + { + case OperandType.FP32: return 2; + case OperandType.FP64: return 3; + case OperandType.I32: return 2; + case OperandType.I64: return 3; + case OperandType.V128: return 4; + } + + throw new InvalidOperationException($"Invalid operand type \"{type}\"."); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/IntermediateRepresentation/Operation.cs b/src/ARMeilleure/IntermediateRepresentation/Operation.cs new file mode 100644 index 00000000..c71e143c --- /dev/null +++ b/src/ARMeilleure/IntermediateRepresentation/Operation.cs @@ -0,0 +1,376 @@ +using System; +using System.Diagnostics; +using System.Runtime.CompilerServices; + +namespace ARMeilleure.IntermediateRepresentation +{ + unsafe struct Operation : IEquatable<Operation>, IIntrusiveListNode<Operation> + { + internal struct Data + { + public ushort Instruction; + public ushort Intrinsic; + public ushort SourcesCount; + public ushort DestinationsCount; + public Operation ListPrevious; + public Operation ListNext; + public Operand* Destinations; + public Operand* Sources; + } + + private Data* _data; + + public Instruction Instruction + { + get => (Instruction)_data->Instruction; + private set => _data->Instruction = (ushort)value; + } + + public Intrinsic Intrinsic + { + get => (Intrinsic)_data->Intrinsic; + private set => _data->Intrinsic = (ushort)value; + } + + public Operation ListPrevious + { + get => _data->ListPrevious; + set => _data->ListPrevious = value; + } + + public Operation ListNext + { + get => _data->ListNext; + set => _data->ListNext = value; + } + + public Operand Destination + { + get => _data->DestinationsCount != 0 ? GetDestination(0) : default; + set => SetDestination(value); + } + + public int DestinationsCount => _data->DestinationsCount; + public int SourcesCount => _data->SourcesCount; + + internal Span<Operand> DestinationsUnsafe => new(_data->Destinations, _data->DestinationsCount); + internal Span<Operand> SourcesUnsafe => new(_data->Sources, _data->SourcesCount); + + public PhiOperation AsPhi() + { + Debug.Assert(Instruction == Instruction.Phi); + + return new PhiOperation(this); + } + + public Operand GetDestination(int index) + { + return DestinationsUnsafe[index]; + } + + public Operand GetSource(int index) + { + return SourcesUnsafe[index]; + } + + public void SetDestination(int index, Operand dest) + { + ref Operand curDest = ref DestinationsUnsafe[index]; + + RemoveAssignment(curDest); + AddAssignment(dest); + + curDest = dest; + } + + public void SetSource(int index, Operand src) + { + ref Operand curSrc = ref SourcesUnsafe[index]; + + RemoveUse(curSrc); + AddUse(src); + + curSrc = src; + } + + private void RemoveOldDestinations() + { + for (int i = 0; i < _data->DestinationsCount; i++) + { + RemoveAssignment(_data->Destinations[i]); + } + } + + public void SetDestination(Operand dest) + { + RemoveOldDestinations(); + + if (dest == default) + { + _data->DestinationsCount = 0; + } + else + { + EnsureCapacity(ref _data->Destinations, ref _data->DestinationsCount, 1); + + _data->Destinations[0] = dest; + + AddAssignment(dest); + } + } + + public void SetDestinations(Operand[] dests) + { + RemoveOldDestinations(); + + EnsureCapacity(ref _data->Destinations, ref _data->DestinationsCount, dests.Length); + + for (int index = 0; index < dests.Length; index++) + { + Operand newOp = dests[index]; + + _data->Destinations[index] = newOp; + + AddAssignment(newOp); + } + } + + private void RemoveOldSources() + { + for (int index = 0; index < _data->SourcesCount; index++) + { + RemoveUse(_data->Sources[index]); + } + } + + public void SetSource(Operand src) + { + RemoveOldSources(); + + if (src == default) + { + _data->SourcesCount = 0; + } + else + { + EnsureCapacity(ref _data->Sources, ref _data->SourcesCount, 1); + + _data->Sources[0] = src; + + AddUse(src); + } + } + + public void SetSources(Operand[] srcs) + { + RemoveOldSources(); + + EnsureCapacity(ref _data->Sources, ref _data->SourcesCount, srcs.Length); + + for (int index = 0; index < srcs.Length; index++) + { + Operand newOp = srcs[index]; + + _data->Sources[index] = newOp; + + AddUse(newOp); + } + } + + public void TurnIntoCopy(Operand source) + { + Instruction = Instruction.Copy; + + SetSource(source); + } + + private void AddAssignment(Operand op) + { + if (op != default) + { + op.AddAssignment(this); + } + } + + private void RemoveAssignment(Operand op) + { + if (op != default) + { + op.RemoveAssignment(this); + } + } + + private void AddUse(Operand op) + { + if (op != default) + { + op.AddUse(this); + } + } + + private void RemoveUse(Operand op) + { + if (op != default) + { + op.RemoveUse(this); + } + } + + public bool Equals(Operation operation) + { + return operation._data == _data; + } + + public override bool Equals(object obj) + { + return obj is Operation operation && Equals(operation); + } + + public override int GetHashCode() + { + return HashCode.Combine((IntPtr)_data); + } + + public static bool operator ==(Operation a, Operation b) + { + return a.Equals(b); + } + + public static bool operator !=(Operation a, Operation b) + { + return !a.Equals(b); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void EnsureCapacity(ref Operand* list, ref ushort capacity, int newCapacity) + { + if (newCapacity > ushort.MaxValue) + { + ThrowOverflow(newCapacity); + } + // We only need to allocate a new buffer if we're increasing the size. + else if (newCapacity > capacity) + { + list = Allocators.References.Allocate<Operand>((uint)newCapacity); + } + + capacity = (ushort)newCapacity; + } + + private static void ThrowOverflow(int count) => + throw new OverflowException($"Exceeded maximum size for Source or Destinations. Required {count}."); + + public static class Factory + { + private static Operation Make(Instruction inst, int destCount, int srcCount) + { + Data* data = Allocators.Operations.Allocate<Data>(); + *data = default; + + Operation result = new(); + result._data = data; + result.Instruction = inst; + + EnsureCapacity(ref result._data->Destinations, ref result._data->DestinationsCount, destCount); + EnsureCapacity(ref result._data->Sources, ref result._data->SourcesCount, srcCount); + + result.DestinationsUnsafe.Clear(); + result.SourcesUnsafe.Clear(); + + return result; + } + + public static Operation Operation(Instruction inst, Operand dest) + { + Operation result = Make(inst, 0, 0); + result.SetDestination(dest); + return result; + } + + public static Operation Operation(Instruction inst, Operand dest, Operand src0) + { + Operation result = Make(inst, 0, 1); + result.SetDestination(dest); + result.SetSource(0, src0); + return result; + } + + public static Operation Operation(Instruction inst, Operand dest, Operand src0, Operand src1) + { + Operation result = Make(inst, 0, 2); + result.SetDestination(dest); + result.SetSource(0, src0); + result.SetSource(1, src1); + return result; + } + + public static Operation Operation(Instruction inst, Operand dest, Operand src0, Operand src1, Operand src2) + { + Operation result = Make(inst, 0, 3); + result.SetDestination(dest); + result.SetSource(0, src0); + result.SetSource(1, src1); + result.SetSource(2, src2); + return result; + } + + public static Operation Operation(Instruction inst, Operand dest, int srcCount) + { + Operation result = Make(inst, 0, srcCount); + result.SetDestination(dest); + return result; + } + + public static Operation Operation(Instruction inst, Operand dest, Operand[] srcs) + { + Operation result = Make(inst, 0, srcs.Length); + + result.SetDestination(dest); + + for (int index = 0; index < srcs.Length; index++) + { + result.SetSource(index, srcs[index]); + } + + return result; + } + + public static Operation Operation(Intrinsic intrin, Operand dest, params Operand[] srcs) + { + Operation result = Make(Instruction.Extended, 0, srcs.Length); + + result.Intrinsic = intrin; + result.SetDestination(dest); + + for (int index = 0; index < srcs.Length; index++) + { + result.SetSource(index, srcs[index]); + } + + return result; + } + + public static Operation Operation(Instruction inst, Operand[] dests, Operand[] srcs) + { + Operation result = Make(inst, dests.Length, srcs.Length); + + for (int index = 0; index < dests.Length; index++) + { + result.SetDestination(index, dests[index]); + } + + for (int index = 0; index < srcs.Length; index++) + { + result.SetSource(index, srcs[index]); + } + + return result; + } + + public static Operation PhiOperation(Operand dest, int srcCount) + { + return Operation(Instruction.Phi, dest, srcCount * 2); + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/IntermediateRepresentation/PhiOperation.cs b/src/ARMeilleure/IntermediateRepresentation/PhiOperation.cs new file mode 100644 index 00000000..d2a3cf21 --- /dev/null +++ b/src/ARMeilleure/IntermediateRepresentation/PhiOperation.cs @@ -0,0 +1,37 @@ +using ARMeilleure.Translation; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.IntermediateRepresentation +{ + readonly struct PhiOperation + { + private readonly Operation _operation; + + public PhiOperation(Operation operation) + { + _operation = operation; + } + + public int SourcesCount => _operation.SourcesCount / 2; + + public BasicBlock GetBlock(ControlFlowGraph cfg, int index) + { + return cfg.PostOrderBlocks[cfg.PostOrderMap[_operation.GetSource(index * 2).AsInt32()]]; + } + + public void SetBlock(int index, BasicBlock block) + { + _operation.SetSource(index * 2, Const(block.Index)); + } + + public Operand GetSource(int index) + { + return _operation.GetSource(index * 2 + 1); + } + + public void SetSource(int index, Operand operand) + { + _operation.SetSource(index * 2 + 1, operand); + } + } +} diff --git a/src/ARMeilleure/IntermediateRepresentation/Register.cs b/src/ARMeilleure/IntermediateRepresentation/Register.cs new file mode 100644 index 00000000..241e4d13 --- /dev/null +++ b/src/ARMeilleure/IntermediateRepresentation/Register.cs @@ -0,0 +1,43 @@ +using System; + +namespace ARMeilleure.IntermediateRepresentation +{ + readonly struct Register : IEquatable<Register> + { + public int Index { get; } + + public RegisterType Type { get; } + + public Register(int index, RegisterType type) + { + Index = index; + Type = type; + } + + public override int GetHashCode() + { + return (ushort)Index | ((int)Type << 16); + } + + public static bool operator ==(Register x, Register y) + { + return x.Equals(y); + } + + public static bool operator !=(Register x, Register y) + { + return !x.Equals(y); + } + + public override bool Equals(object obj) + { + return obj is Register reg && Equals(reg); + } + + public bool Equals(Register other) + { + return other.Index == Index && + other.Type == Type; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/IntermediateRepresentation/RegisterType.cs b/src/ARMeilleure/IntermediateRepresentation/RegisterType.cs new file mode 100644 index 00000000..88ac6c12 --- /dev/null +++ b/src/ARMeilleure/IntermediateRepresentation/RegisterType.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.IntermediateRepresentation +{ + enum RegisterType + { + Integer, + Vector, + Flag, + FpFlag + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Memory/IJitMemoryAllocator.cs b/src/ARMeilleure/Memory/IJitMemoryAllocator.cs new file mode 100644 index 00000000..19b696b0 --- /dev/null +++ b/src/ARMeilleure/Memory/IJitMemoryAllocator.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.Memory +{ + public interface IJitMemoryAllocator + { + IJitMemoryBlock Allocate(ulong size); + IJitMemoryBlock Reserve(ulong size); + + ulong GetPageSize(); + } +} diff --git a/src/ARMeilleure/Memory/IJitMemoryBlock.cs b/src/ARMeilleure/Memory/IJitMemoryBlock.cs new file mode 100644 index 00000000..670f2862 --- /dev/null +++ b/src/ARMeilleure/Memory/IJitMemoryBlock.cs @@ -0,0 +1,14 @@ +using System; + +namespace ARMeilleure.Memory +{ + public interface IJitMemoryBlock : IDisposable + { + IntPtr Pointer { get; } + + bool Commit(ulong offset, ulong size); + + void MapAsRx(ulong offset, ulong size); + void MapAsRwx(ulong offset, ulong size); + } +} diff --git a/src/ARMeilleure/Memory/IMemoryManager.cs b/src/ARMeilleure/Memory/IMemoryManager.cs new file mode 100644 index 00000000..5eb1fadd --- /dev/null +++ b/src/ARMeilleure/Memory/IMemoryManager.cs @@ -0,0 +1,77 @@ +using System; + +namespace ARMeilleure.Memory +{ + public interface IMemoryManager + { + int AddressSpaceBits { get; } + + IntPtr PageTablePointer { get; } + + MemoryManagerType Type { get; } + + event Action<ulong, ulong> UnmapEvent; + + /// <summary> + /// Reads data from CPU mapped memory. + /// </summary> + /// <typeparam name="T">Type of the data being read</typeparam> + /// <param name="va">Virtual address of the data in memory</param> + /// <returns>The data</returns> + T Read<T>(ulong va) where T : unmanaged; + + /// <summary> + /// Reads data from CPU mapped memory, with read tracking + /// </summary> + /// <typeparam name="T">Type of the data being read</typeparam> + /// <param name="va">Virtual address of the data in memory</param> + /// <returns>The data</returns> + T ReadTracked<T>(ulong va) where T : unmanaged; + + /// <summary> + /// Writes data to CPU mapped memory. + /// </summary> + /// <typeparam name="T">Type of the data being written</typeparam> + /// <param name="va">Virtual address to write the data into</param> + /// <param name="value">Data to be written</param> + void Write<T>(ulong va, T value) where T : unmanaged; + + /// <summary> + /// Gets a read-only span of data from CPU mapped memory. + /// </summary> + /// <param name="va">Virtual address of the data</param> + /// <param name="size">Size of the data</param> + /// <param name="tracked">True if read tracking is triggered on the span</param> + /// <returns>A read-only span of the data</returns> + ReadOnlySpan<byte> GetSpan(ulong va, int size, bool tracked = false); + + /// <summary> + /// Gets a reference for the given type at the specified virtual memory address. + /// </summary> + /// <remarks> + /// The data must be located at a contiguous memory region. + /// </remarks> + /// <typeparam name="T">Type of the data to get the reference</typeparam> + /// <param name="va">Virtual address of the data</param> + /// <returns>A reference to the data in memory</returns> + ref T GetRef<T>(ulong va) where T : unmanaged; + + /// <summary> + /// Checks if the page at a given CPU virtual address is mapped. + /// </summary> + /// <param name="va">Virtual address to check</param> + /// <returns>True if the address is mapped, false otherwise</returns> + bool IsMapped(ulong va); + + /// <summary> + /// Alerts the memory tracking that a given region has been read from or written to. + /// This should be called before read/write is performed. + /// </summary> + /// <param name="va">Virtual address of the region</param> + /// <param name="size">Size of the region</param> + /// <param name="write">True if the region was written, false if read</param> + /// <param name="precise">True if the access is precise, false otherwise</param> + /// <param name="exemptId">Optional ID of the handles that should not be signalled</param> + void SignalMemoryTracking(ulong va, ulong size, bool write, bool precise = false, int? exemptId = null); + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Memory/InvalidAccessException.cs b/src/ARMeilleure/Memory/InvalidAccessException.cs new file mode 100644 index 00000000..ad540719 --- /dev/null +++ b/src/ARMeilleure/Memory/InvalidAccessException.cs @@ -0,0 +1,23 @@ +using System; + +namespace ARMeilleure.Memory +{ + class InvalidAccessException : Exception + { + public InvalidAccessException() + { + } + + public InvalidAccessException(ulong address) : base($"Invalid memory access at virtual address 0x{address:X16}.") + { + } + + public InvalidAccessException(string message) : base(message) + { + } + + public InvalidAccessException(string message, Exception innerException) : base(message, innerException) + { + } + } +} diff --git a/src/ARMeilleure/Memory/MemoryManagerType.cs b/src/ARMeilleure/Memory/MemoryManagerType.cs new file mode 100644 index 00000000..ce84ccaf --- /dev/null +++ b/src/ARMeilleure/Memory/MemoryManagerType.cs @@ -0,0 +1,41 @@ +namespace ARMeilleure.Memory +{ + /// <summary> + /// Indicates the type of a memory manager and the method it uses for memory mapping + /// and address translation. This controls the code generated for memory accesses on the JIT. + /// </summary> + public enum MemoryManagerType + { + /// <summary> + /// Complete software MMU implementation, the read/write methods are always called, + /// without any attempt to perform faster memory access. + /// </summary> + SoftwareMmu, + + /// <summary> + /// High level implementation using a software flat page table for address translation, + /// used to speed up address translation if possible without calling the read/write methods. + /// </summary> + SoftwarePageTable, + + /// <summary> + /// High level implementation with mappings managed by the host OS, effectively using hardware + /// page tables. No address translation is performed in software and the memory is just accessed directly. + /// </summary> + HostMapped, + + /// <summary> + /// Same as the host mapped memory manager type, but without masking the address within the address space. + /// Allows invalid access from JIT code to the rest of the program, but is faster. + /// </summary> + HostMappedUnsafe + } + + static class MemoryManagerTypeExtensions + { + public static bool IsHostMapped(this MemoryManagerType type) + { + return type == MemoryManagerType.HostMapped || type == MemoryManagerType.HostMappedUnsafe; + } + } +} diff --git a/src/ARMeilleure/Memory/ReservedRegion.cs b/src/ARMeilleure/Memory/ReservedRegion.cs new file mode 100644 index 00000000..2197afad --- /dev/null +++ b/src/ARMeilleure/Memory/ReservedRegion.cs @@ -0,0 +1,58 @@ +using System; + +namespace ARMeilleure.Memory +{ + class ReservedRegion + { + public const int DefaultGranularity = 65536; // Mapping granularity in Windows. + + public IJitMemoryBlock Block { get; } + + public IntPtr Pointer => Block.Pointer; + + private readonly ulong _maxSize; + private readonly ulong _sizeGranularity; + private ulong _currentSize; + + public ReservedRegion(IJitMemoryAllocator allocator, ulong maxSize, ulong granularity = 0) + { + if (granularity == 0) + { + granularity = DefaultGranularity; + } + + Block = allocator.Reserve(maxSize); + _maxSize = maxSize; + _sizeGranularity = granularity; + _currentSize = 0; + } + + public void ExpandIfNeeded(ulong desiredSize) + { + if (desiredSize > _maxSize) + { + throw new OutOfMemoryException(); + } + + if (desiredSize > _currentSize) + { + // Lock, and then check again. We only want to commit once. + lock (this) + { + if (desiredSize >= _currentSize) + { + ulong overflowBytes = desiredSize - _currentSize; + ulong moreToCommit = (((_sizeGranularity - 1) + overflowBytes) / _sizeGranularity) * _sizeGranularity; // Round up. + Block.Commit(_currentSize, moreToCommit); + _currentSize += moreToCommit; + } + } + } + } + + public void Dispose() + { + Block.Dispose(); + } + } +} diff --git a/src/ARMeilleure/Native/JitSupportDarwin.cs b/src/ARMeilleure/Native/JitSupportDarwin.cs new file mode 100644 index 00000000..7d6a8634 --- /dev/null +++ b/src/ARMeilleure/Native/JitSupportDarwin.cs @@ -0,0 +1,13 @@ +using System; +using System.Runtime.InteropServices; +using System.Runtime.Versioning; + +namespace ARMeilleure.Native +{ + [SupportedOSPlatform("macos")] + public static partial class JitSupportDarwin + { + [LibraryImport("libarmeilleure-jitsupport", EntryPoint = "armeilleure_jit_memcpy")] + public static partial void Copy(IntPtr dst, IntPtr src, ulong n); + } +} diff --git a/src/ARMeilleure/Native/libs/libarmeilleure-jitsupport.dylib b/src/ARMeilleure/Native/libs/libarmeilleure-jitsupport.dylib Binary files differnew file mode 100644 index 00000000..c65b0a4e --- /dev/null +++ b/src/ARMeilleure/Native/libs/libarmeilleure-jitsupport.dylib diff --git a/src/ARMeilleure/Native/macos_jit_support/Makefile b/src/ARMeilleure/Native/macos_jit_support/Makefile new file mode 100644 index 00000000..d6da35d5 --- /dev/null +++ b/src/ARMeilleure/Native/macos_jit_support/Makefile @@ -0,0 +1,8 @@ +NAME = libarmeilleure-jitsupport.dylib + +all: ${NAME} + +${NAME}: + clang -O3 -dynamiclib support.c -o ${NAME} +clean: + rm -f ${NAME} diff --git a/src/ARMeilleure/Native/macos_jit_support/support.c b/src/ARMeilleure/Native/macos_jit_support/support.c new file mode 100644 index 00000000..1b13d906 --- /dev/null +++ b/src/ARMeilleure/Native/macos_jit_support/support.c @@ -0,0 +1,14 @@ +#include <stddef.h> +#include <string.h> +#include <pthread.h> + +#include <libkern/OSCacheControl.h> + +void armeilleure_jit_memcpy(void *dst, const void *src, size_t n) { + pthread_jit_write_protect_np(0); + memcpy(dst, src, n); + pthread_jit_write_protect_np(1); + + // Ensure that the instruction cache for this range is invalidated. + sys_icache_invalidate(dst, n); +} diff --git a/src/ARMeilleure/Optimizations.cs b/src/ARMeilleure/Optimizations.cs new file mode 100644 index 00000000..a84a4dc4 --- /dev/null +++ b/src/ARMeilleure/Optimizations.cs @@ -0,0 +1,68 @@ +using System.Runtime.Intrinsics.Arm; + +namespace ARMeilleure +{ + using Arm64HardwareCapabilities = ARMeilleure.CodeGen.Arm64.HardwareCapabilities; + using X86HardwareCapabilities = ARMeilleure.CodeGen.X86.HardwareCapabilities; + + public static class Optimizations + { + public static bool FastFP { get; set; } = true; + + public static bool AllowLcqInFunctionTable { get; set; } = true; + public static bool UseUnmanagedDispatchLoop { get; set; } = true; + + public static bool UseAdvSimdIfAvailable { get; set; } = true; + public static bool UseArm64PmullIfAvailable { get; set; } = true; + + public static bool UseSseIfAvailable { get; set; } = true; + public static bool UseSse2IfAvailable { get; set; } = true; + public static bool UseSse3IfAvailable { get; set; } = true; + public static bool UseSsse3IfAvailable { get; set; } = true; + public static bool UseSse41IfAvailable { get; set; } = true; + public static bool UseSse42IfAvailable { get; set; } = true; + public static bool UsePopCntIfAvailable { get; set; } = true; + public static bool UseAvxIfAvailable { get; set; } = true; + public static bool UseAvx512FIfAvailable { get; set; } = true; + public static bool UseAvx512VlIfAvailable { get; set; } = true; + public static bool UseAvx512BwIfAvailable { get; set; } = true; + public static bool UseAvx512DqIfAvailable { get; set; } = true; + public static bool UseF16cIfAvailable { get; set; } = true; + public static bool UseFmaIfAvailable { get; set; } = true; + public static bool UseAesniIfAvailable { get; set; } = true; + public static bool UsePclmulqdqIfAvailable { get; set; } = true; + public static bool UseShaIfAvailable { get; set; } = true; + public static bool UseGfniIfAvailable { get; set; } = true; + + public static bool ForceLegacySse + { + get => X86HardwareCapabilities.ForceLegacySse; + set => X86HardwareCapabilities.ForceLegacySse = value; + } + + internal static bool UseAdvSimd => UseAdvSimdIfAvailable && Arm64HardwareCapabilities.SupportsAdvSimd; + internal static bool UseArm64Pmull => UseArm64PmullIfAvailable && Arm64HardwareCapabilities.SupportsPmull; + + internal static bool UseSse => UseSseIfAvailable && X86HardwareCapabilities.SupportsSse; + internal static bool UseSse2 => UseSse2IfAvailable && X86HardwareCapabilities.SupportsSse2; + internal static bool UseSse3 => UseSse3IfAvailable && X86HardwareCapabilities.SupportsSse3; + internal static bool UseSsse3 => UseSsse3IfAvailable && X86HardwareCapabilities.SupportsSsse3; + internal static bool UseSse41 => UseSse41IfAvailable && X86HardwareCapabilities.SupportsSse41; + internal static bool UseSse42 => UseSse42IfAvailable && X86HardwareCapabilities.SupportsSse42; + internal static bool UsePopCnt => UsePopCntIfAvailable && X86HardwareCapabilities.SupportsPopcnt; + internal static bool UseAvx => UseAvxIfAvailable && X86HardwareCapabilities.SupportsAvx && !ForceLegacySse; + internal static bool UseAvx512F => UseAvx512FIfAvailable && X86HardwareCapabilities.SupportsAvx512F && !ForceLegacySse; + internal static bool UseAvx512Vl => UseAvx512VlIfAvailable && X86HardwareCapabilities.SupportsAvx512Vl && !ForceLegacySse; + internal static bool UseAvx512Bw => UseAvx512BwIfAvailable && X86HardwareCapabilities.SupportsAvx512Bw && !ForceLegacySse; + internal static bool UseAvx512Dq => UseAvx512DqIfAvailable && X86HardwareCapabilities.SupportsAvx512Dq && !ForceLegacySse; + internal static bool UseF16c => UseF16cIfAvailable && X86HardwareCapabilities.SupportsF16c; + internal static bool UseFma => UseFmaIfAvailable && X86HardwareCapabilities.SupportsFma; + internal static bool UseAesni => UseAesniIfAvailable && X86HardwareCapabilities.SupportsAesni; + internal static bool UsePclmulqdq => UsePclmulqdqIfAvailable && X86HardwareCapabilities.SupportsPclmulqdq; + internal static bool UseSha => UseShaIfAvailable && X86HardwareCapabilities.SupportsSha; + internal static bool UseGfni => UseGfniIfAvailable && X86HardwareCapabilities.SupportsGfni; + + internal static bool UseAvx512Ortho => UseAvx512F && UseAvx512Vl; + internal static bool UseAvx512OrthoFloat => UseAvx512Ortho && UseAvx512Dq; + } +} diff --git a/src/ARMeilleure/Signal/NativeSignalHandler.cs b/src/ARMeilleure/Signal/NativeSignalHandler.cs new file mode 100644 index 00000000..cddeb817 --- /dev/null +++ b/src/ARMeilleure/Signal/NativeSignalHandler.cs @@ -0,0 +1,422 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Memory; +using ARMeilleure.Translation; +using ARMeilleure.Translation.Cache; +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Signal +{ + [StructLayout(LayoutKind.Sequential, Pack = 1)] + struct SignalHandlerRange + { + public int IsActive; + public nuint RangeAddress; + public nuint RangeEndAddress; + public IntPtr ActionPointer; + } + + [StructLayout(LayoutKind.Sequential, Pack = 1)] + struct SignalHandlerConfig + { + /// <summary> + /// The byte offset of the faulting address in the SigInfo or ExceptionRecord struct. + /// </summary> + public int StructAddressOffset; + + /// <summary> + /// The byte offset of the write flag in the SigInfo or ExceptionRecord struct. + /// </summary> + public int StructWriteOffset; + + /// <summary> + /// The sigaction handler that was registered before this one. (unix only) + /// </summary> + public nuint UnixOldSigaction; + + /// <summary> + /// The type of the previous sigaction. True for the 3 argument variant. (unix only) + /// </summary> + public int UnixOldSigaction3Arg; + + public SignalHandlerRange Range0; + public SignalHandlerRange Range1; + public SignalHandlerRange Range2; + public SignalHandlerRange Range3; + public SignalHandlerRange Range4; + public SignalHandlerRange Range5; + public SignalHandlerRange Range6; + public SignalHandlerRange Range7; + } + + public static class NativeSignalHandler + { + private delegate void UnixExceptionHandler(int sig, IntPtr info, IntPtr ucontext); + [UnmanagedFunctionPointer(CallingConvention.Winapi)] + private delegate int VectoredExceptionHandler(IntPtr exceptionInfo); + + private const int MaxTrackedRanges = 8; + + private const int StructAddressOffset = 0; + private const int StructWriteOffset = 4; + private const int UnixOldSigaction = 8; + private const int UnixOldSigaction3Arg = 16; + private const int RangeOffset = 20; + + private const int EXCEPTION_CONTINUE_SEARCH = 0; + private const int EXCEPTION_CONTINUE_EXECUTION = -1; + + private const uint EXCEPTION_ACCESS_VIOLATION = 0xc0000005; + + private static ulong _pageSize; + private static ulong _pageMask; + + private static IntPtr _handlerConfig; + private static IntPtr _signalHandlerPtr; + private static IntPtr _signalHandlerHandle; + + private static readonly object _lock = new object(); + private static bool _initialized; + + static NativeSignalHandler() + { + _handlerConfig = Marshal.AllocHGlobal(Unsafe.SizeOf<SignalHandlerConfig>()); + ref SignalHandlerConfig config = ref GetConfigRef(); + + config = new SignalHandlerConfig(); + } + + public static void Initialize(IJitMemoryAllocator allocator) + { + JitCache.Initialize(allocator); + } + + public static void InitializeSignalHandler(ulong pageSize, Func<IntPtr, IntPtr, IntPtr> customSignalHandlerFactory = null) + { + if (_initialized) return; + + lock (_lock) + { + if (_initialized) return; + + _pageSize = pageSize; + _pageMask = pageSize - 1; + + ref SignalHandlerConfig config = ref GetConfigRef(); + + if (OperatingSystem.IsLinux() || OperatingSystem.IsMacOS()) + { + _signalHandlerPtr = Marshal.GetFunctionPointerForDelegate(GenerateUnixSignalHandler(_handlerConfig)); + + if (customSignalHandlerFactory != null) + { + _signalHandlerPtr = customSignalHandlerFactory(UnixSignalHandlerRegistration.GetSegfaultExceptionHandler().sa_handler, _signalHandlerPtr); + } + + var old = UnixSignalHandlerRegistration.RegisterExceptionHandler(_signalHandlerPtr); + + config.UnixOldSigaction = (nuint)(ulong)old.sa_handler; + config.UnixOldSigaction3Arg = old.sa_flags & 4; + } + else + { + config.StructAddressOffset = 40; // ExceptionInformation1 + config.StructWriteOffset = 32; // ExceptionInformation0 + + _signalHandlerPtr = Marshal.GetFunctionPointerForDelegate(GenerateWindowsSignalHandler(_handlerConfig)); + + if (customSignalHandlerFactory != null) + { + _signalHandlerPtr = customSignalHandlerFactory(IntPtr.Zero, _signalHandlerPtr); + } + + _signalHandlerHandle = WindowsSignalHandlerRegistration.RegisterExceptionHandler(_signalHandlerPtr); + } + + _initialized = true; + } + } + + private static unsafe ref SignalHandlerConfig GetConfigRef() + { + return ref Unsafe.AsRef<SignalHandlerConfig>((void*)_handlerConfig); + } + + public static unsafe bool AddTrackedRegion(nuint address, nuint endAddress, IntPtr action) + { + var ranges = &((SignalHandlerConfig*)_handlerConfig)->Range0; + + for (int i = 0; i < MaxTrackedRanges; i++) + { + if (ranges[i].IsActive == 0) + { + ranges[i].RangeAddress = address; + ranges[i].RangeEndAddress = endAddress; + ranges[i].ActionPointer = action; + ranges[i].IsActive = 1; + + return true; + } + } + + return false; + } + + public static unsafe bool RemoveTrackedRegion(nuint address) + { + var ranges = &((SignalHandlerConfig*)_handlerConfig)->Range0; + + for (int i = 0; i < MaxTrackedRanges; i++) + { + if (ranges[i].IsActive == 1 && ranges[i].RangeAddress == address) + { + ranges[i].IsActive = 0; + + return true; + } + } + + return false; + } + + private static Operand EmitGenericRegionCheck(EmitterContext context, IntPtr signalStructPtr, Operand faultAddress, Operand isWrite) + { + Operand inRegionLocal = context.AllocateLocal(OperandType.I32); + context.Copy(inRegionLocal, Const(0)); + + Operand endLabel = Label(); + + for (int i = 0; i < MaxTrackedRanges; i++) + { + ulong rangeBaseOffset = (ulong)(RangeOffset + i * Unsafe.SizeOf<SignalHandlerRange>()); + + Operand nextLabel = Label(); + + Operand isActive = context.Load(OperandType.I32, Const((ulong)signalStructPtr + rangeBaseOffset)); + + context.BranchIfFalse(nextLabel, isActive); + + Operand rangeAddress = context.Load(OperandType.I64, Const((ulong)signalStructPtr + rangeBaseOffset + 4)); + Operand rangeEndAddress = context.Load(OperandType.I64, Const((ulong)signalStructPtr + rangeBaseOffset + 12)); + + // Is the fault address within this tracked region? + Operand inRange = context.BitwiseAnd( + context.ICompare(faultAddress, rangeAddress, Comparison.GreaterOrEqualUI), + context.ICompare(faultAddress, rangeEndAddress, Comparison.LessUI) + ); + + // Only call tracking if in range. + context.BranchIfFalse(nextLabel, inRange, BasicBlockFrequency.Cold); + + Operand offset = context.BitwiseAnd(context.Subtract(faultAddress, rangeAddress), Const(~_pageMask)); + + // Call the tracking action, with the pointer's relative offset to the base address. + Operand trackingActionPtr = context.Load(OperandType.I64, Const((ulong)signalStructPtr + rangeBaseOffset + 20)); + + context.Copy(inRegionLocal, Const(0)); + + Operand skipActionLabel = Label(); + + // Tracking action should be non-null to call it, otherwise assume false return. + context.BranchIfFalse(skipActionLabel, trackingActionPtr); + Operand result = context.Call(trackingActionPtr, OperandType.I32, offset, Const(_pageSize), isWrite); + context.Copy(inRegionLocal, result); + + context.MarkLabel(skipActionLabel); + + // If the tracking action returns false or does not exist, it might be an invalid access due to a partial overlap on Windows. + if (OperatingSystem.IsWindows()) + { + context.BranchIfTrue(endLabel, inRegionLocal); + + context.Copy(inRegionLocal, WindowsPartialUnmapHandler.EmitRetryFromAccessViolation(context)); + } + + context.Branch(endLabel); + + context.MarkLabel(nextLabel); + } + + context.MarkLabel(endLabel); + + return context.Copy(inRegionLocal); + } + + private static Operand GenerateUnixFaultAddress(EmitterContext context, Operand sigInfoPtr) + { + ulong structAddressOffset = OperatingSystem.IsMacOS() ? 24ul : 16ul; // si_addr + return context.Load(OperandType.I64, context.Add(sigInfoPtr, Const(structAddressOffset))); + } + + private static Operand GenerateUnixWriteFlag(EmitterContext context, Operand ucontextPtr) + { + if (OperatingSystem.IsMacOS()) + { + const ulong mcontextOffset = 48; // uc_mcontext + Operand ctxPtr = context.Load(OperandType.I64, context.Add(ucontextPtr, Const(mcontextOffset))); + + if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64) + { + const ulong esrOffset = 8; // __es.__esr + Operand esr = context.Load(OperandType.I64, context.Add(ctxPtr, Const(esrOffset))); + return context.BitwiseAnd(esr, Const(0x40ul)); + } + + if (RuntimeInformation.ProcessArchitecture == Architecture.X64) + { + const ulong errOffset = 4; // __es.__err + Operand err = context.Load(OperandType.I64, context.Add(ctxPtr, Const(errOffset))); + return context.BitwiseAnd(err, Const(2ul)); + } + } + else if (OperatingSystem.IsLinux()) + { + if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64) + { + Operand auxPtr = context.AllocateLocal(OperandType.I64); + + Operand loopLabel = Label(); + Operand successLabel = Label(); + + const ulong auxOffset = 464; // uc_mcontext.__reserved + const uint esrMagic = 0x45535201; + + context.Copy(auxPtr, context.Add(ucontextPtr, Const(auxOffset))); + + context.MarkLabel(loopLabel); + + // _aarch64_ctx::magic + Operand magic = context.Load(OperandType.I32, auxPtr); + // _aarch64_ctx::size + Operand size = context.Load(OperandType.I32, context.Add(auxPtr, Const(4ul))); + + context.BranchIf(successLabel, magic, Const(esrMagic), Comparison.Equal); + + context.Copy(auxPtr, context.Add(auxPtr, context.ZeroExtend32(OperandType.I64, size))); + + context.Branch(loopLabel); + + context.MarkLabel(successLabel); + + // esr_context::esr + Operand esr = context.Load(OperandType.I64, context.Add(auxPtr, Const(8ul))); + return context.BitwiseAnd(esr, Const(0x40ul)); + } + + if (RuntimeInformation.ProcessArchitecture == Architecture.X64) + { + const int errOffset = 192; // uc_mcontext.gregs[REG_ERR] + Operand err = context.Load(OperandType.I64, context.Add(ucontextPtr, Const(errOffset))); + return context.BitwiseAnd(err, Const(2ul)); + } + } + + throw new PlatformNotSupportedException(); + } + + private static UnixExceptionHandler GenerateUnixSignalHandler(IntPtr signalStructPtr) + { + EmitterContext context = new EmitterContext(); + + // (int sig, SigInfo* sigInfo, void* ucontext) + Operand sigInfoPtr = context.LoadArgument(OperandType.I64, 1); + Operand ucontextPtr = context.LoadArgument(OperandType.I64, 2); + + Operand faultAddress = GenerateUnixFaultAddress(context, sigInfoPtr); + Operand writeFlag = GenerateUnixWriteFlag(context, ucontextPtr); + + Operand isWrite = context.ICompareNotEqual(writeFlag, Const(0L)); // Normalize to 0/1. + + Operand isInRegion = EmitGenericRegionCheck(context, signalStructPtr, faultAddress, isWrite); + + Operand endLabel = Label(); + + context.BranchIfTrue(endLabel, isInRegion); + + Operand unixOldSigaction = context.Load(OperandType.I64, Const((ulong)signalStructPtr + UnixOldSigaction)); + Operand unixOldSigaction3Arg = context.Load(OperandType.I64, Const((ulong)signalStructPtr + UnixOldSigaction3Arg)); + Operand threeArgLabel = Label(); + + context.BranchIfTrue(threeArgLabel, unixOldSigaction3Arg); + + context.Call(unixOldSigaction, OperandType.None, context.LoadArgument(OperandType.I32, 0)); + context.Branch(endLabel); + + context.MarkLabel(threeArgLabel); + + context.Call(unixOldSigaction, + OperandType.None, + context.LoadArgument(OperandType.I32, 0), + sigInfoPtr, + context.LoadArgument(OperandType.I64, 2) + ); + + context.MarkLabel(endLabel); + + context.Return(); + + ControlFlowGraph cfg = context.GetControlFlowGraph(); + + OperandType[] argTypes = new OperandType[] { OperandType.I32, OperandType.I64, OperandType.I64 }; + + return Compiler.Compile(cfg, argTypes, OperandType.None, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<UnixExceptionHandler>(); + } + + private static VectoredExceptionHandler GenerateWindowsSignalHandler(IntPtr signalStructPtr) + { + EmitterContext context = new EmitterContext(); + + // (ExceptionPointers* exceptionInfo) + Operand exceptionInfoPtr = context.LoadArgument(OperandType.I64, 0); + Operand exceptionRecordPtr = context.Load(OperandType.I64, exceptionInfoPtr); + + // First thing's first - this catches a number of exceptions, but we only want access violations. + Operand validExceptionLabel = Label(); + + Operand exceptionCode = context.Load(OperandType.I32, exceptionRecordPtr); + + context.BranchIf(validExceptionLabel, exceptionCode, Const(EXCEPTION_ACCESS_VIOLATION), Comparison.Equal); + + context.Return(Const(EXCEPTION_CONTINUE_SEARCH)); // Don't handle this one. + + context.MarkLabel(validExceptionLabel); + + // Next, read the address of the invalid access, and whether it is a write or not. + + Operand structAddressOffset = context.Load(OperandType.I32, Const((ulong)signalStructPtr + StructAddressOffset)); + Operand structWriteOffset = context.Load(OperandType.I32, Const((ulong)signalStructPtr + StructWriteOffset)); + + Operand faultAddress = context.Load(OperandType.I64, context.Add(exceptionRecordPtr, context.ZeroExtend32(OperandType.I64, structAddressOffset))); + Operand writeFlag = context.Load(OperandType.I64, context.Add(exceptionRecordPtr, context.ZeroExtend32(OperandType.I64, structWriteOffset))); + + Operand isWrite = context.ICompareNotEqual(writeFlag, Const(0L)); // Normalize to 0/1. + + Operand isInRegion = EmitGenericRegionCheck(context, signalStructPtr, faultAddress, isWrite); + + Operand endLabel = Label(); + + // If the region check result is false, then run the next vectored exception handler. + + context.BranchIfTrue(endLabel, isInRegion); + + context.Return(Const(EXCEPTION_CONTINUE_SEARCH)); + + context.MarkLabel(endLabel); + + // Otherwise, return to execution. + + context.Return(Const(EXCEPTION_CONTINUE_EXECUTION)); + + // Compile and return the function. + + ControlFlowGraph cfg = context.GetControlFlowGraph(); + + OperandType[] argTypes = new OperandType[] { OperandType.I64 }; + + return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<VectoredExceptionHandler>(); + } + } +} diff --git a/src/ARMeilleure/Signal/TestMethods.cs b/src/ARMeilleure/Signal/TestMethods.cs new file mode 100644 index 00000000..e2ecad24 --- /dev/null +++ b/src/ARMeilleure/Signal/TestMethods.cs @@ -0,0 +1,84 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using System; +using System.Runtime.InteropServices; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Signal +{ + public struct NativeWriteLoopState + { + public int Running; + public int Error; + } + + public static class TestMethods + { + public delegate bool DebugPartialUnmap(); + public delegate int DebugThreadLocalMapGetOrReserve(int threadId, int initialState); + public delegate void DebugNativeWriteLoop(IntPtr nativeWriteLoopPtr, IntPtr writePtr); + + public static DebugPartialUnmap GenerateDebugPartialUnmap() + { + EmitterContext context = new EmitterContext(); + + var result = WindowsPartialUnmapHandler.EmitRetryFromAccessViolation(context); + + context.Return(result); + + // Compile and return the function. + + ControlFlowGraph cfg = context.GetControlFlowGraph(); + + OperandType[] argTypes = new OperandType[] { OperandType.I64 }; + + return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<DebugPartialUnmap>(); + } + + public static DebugThreadLocalMapGetOrReserve GenerateDebugThreadLocalMapGetOrReserve(IntPtr structPtr) + { + EmitterContext context = new EmitterContext(); + + var result = WindowsPartialUnmapHandler.EmitThreadLocalMapIntGetOrReserve(context, structPtr, context.LoadArgument(OperandType.I32, 0), context.LoadArgument(OperandType.I32, 1)); + + context.Return(result); + + // Compile and return the function. + + ControlFlowGraph cfg = context.GetControlFlowGraph(); + + OperandType[] argTypes = new OperandType[] { OperandType.I64 }; + + return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<DebugThreadLocalMapGetOrReserve>(); + } + + public static DebugNativeWriteLoop GenerateDebugNativeWriteLoop() + { + EmitterContext context = new EmitterContext(); + + // Loop a write to the target address until "running" is false. + + Operand structPtr = context.Copy(context.LoadArgument(OperandType.I64, 0)); + Operand writePtr = context.Copy(context.LoadArgument(OperandType.I64, 1)); + + Operand loopLabel = Label(); + context.MarkLabel(loopLabel); + + context.Store(writePtr, Const(12345)); + + Operand running = context.Load(OperandType.I32, structPtr); + + context.BranchIfTrue(loopLabel, running); + + context.Return(); + + // Compile and return the function. + + ControlFlowGraph cfg = context.GetControlFlowGraph(); + + OperandType[] argTypes = new OperandType[] { OperandType.I64 }; + + return Compiler.Compile(cfg, argTypes, OperandType.None, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<DebugNativeWriteLoop>(); + } + } +} diff --git a/src/ARMeilleure/Signal/UnixSignalHandlerRegistration.cs b/src/ARMeilleure/Signal/UnixSignalHandlerRegistration.cs new file mode 100644 index 00000000..22009240 --- /dev/null +++ b/src/ARMeilleure/Signal/UnixSignalHandlerRegistration.cs @@ -0,0 +1,83 @@ +using System; +using System.Runtime.InteropServices; + +namespace ARMeilleure.Signal +{ + static partial class UnixSignalHandlerRegistration + { + [StructLayout(LayoutKind.Sequential, Pack = 1)] + public unsafe struct SigSet + { + fixed long sa_mask[16]; + } + + [StructLayout(LayoutKind.Sequential, Pack = 1)] + public struct SigAction + { + public IntPtr sa_handler; + public SigSet sa_mask; + public int sa_flags; + public IntPtr sa_restorer; + } + + private const int SIGSEGV = 11; + private const int SIGBUS = 10; + private const int SA_SIGINFO = 0x00000004; + + [LibraryImport("libc", SetLastError = true)] + private static partial int sigaction(int signum, ref SigAction sigAction, out SigAction oldAction); + + [LibraryImport("libc", SetLastError = true)] + private static partial int sigaction(int signum, IntPtr sigAction, out SigAction oldAction); + + [LibraryImport("libc", SetLastError = true)] + private static partial int sigemptyset(ref SigSet set); + + public static SigAction GetSegfaultExceptionHandler() + { + int result = sigaction(SIGSEGV, IntPtr.Zero, out SigAction old); + + if (result != 0) + { + throw new InvalidOperationException($"Could not get SIGSEGV sigaction. Error: {result}"); + } + + return old; + } + + public static SigAction RegisterExceptionHandler(IntPtr action) + { + SigAction sig = new SigAction + { + sa_handler = action, + sa_flags = SA_SIGINFO + }; + + sigemptyset(ref sig.sa_mask); + + int result = sigaction(SIGSEGV, ref sig, out SigAction old); + + if (result != 0) + { + throw new InvalidOperationException($"Could not register SIGSEGV sigaction. Error: {result}"); + } + + if (OperatingSystem.IsMacOS()) + { + result = sigaction(SIGBUS, ref sig, out _); + + if (result != 0) + { + throw new InvalidOperationException($"Could not register SIGBUS sigaction. Error: {result}"); + } + } + + return old; + } + + public static bool RestoreExceptionHandler(SigAction oldAction) + { + return sigaction(SIGSEGV, ref oldAction, out SigAction _) == 0 && (!OperatingSystem.IsMacOS() || sigaction(SIGBUS, ref oldAction, out SigAction _) == 0); + } + } +} diff --git a/src/ARMeilleure/Signal/WindowsPartialUnmapHandler.cs b/src/ARMeilleure/Signal/WindowsPartialUnmapHandler.cs new file mode 100644 index 00000000..941e36e5 --- /dev/null +++ b/src/ARMeilleure/Signal/WindowsPartialUnmapHandler.cs @@ -0,0 +1,186 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Translation; +using Ryujinx.Common.Memory.PartialUnmaps; +using System; + +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Signal +{ + /// <summary> + /// Methods to handle signals caused by partial unmaps. See the structs for C# implementations of the methods. + /// </summary> + internal static class WindowsPartialUnmapHandler + { + public static Operand EmitRetryFromAccessViolation(EmitterContext context) + { + IntPtr partialRemapStatePtr = PartialUnmapState.GlobalState; + IntPtr localCountsPtr = IntPtr.Add(partialRemapStatePtr, PartialUnmapState.LocalCountsOffset); + + // Get the lock first. + EmitNativeReaderLockAcquire(context, IntPtr.Add(partialRemapStatePtr, PartialUnmapState.PartialUnmapLockOffset)); + + IntPtr getCurrentThreadId = WindowsSignalHandlerRegistration.GetCurrentThreadIdFunc(); + Operand threadId = context.Call(Const((ulong)getCurrentThreadId), OperandType.I32); + Operand threadIndex = EmitThreadLocalMapIntGetOrReserve(context, localCountsPtr, threadId, Const(0)); + + Operand endLabel = Label(); + Operand retry = context.AllocateLocal(OperandType.I32); + Operand threadIndexValidLabel = Label(); + + context.BranchIfFalse(threadIndexValidLabel, context.ICompareEqual(threadIndex, Const(-1))); + + context.Copy(retry, Const(1)); // Always retry when thread local cannot be allocated. + + context.Branch(endLabel); + + context.MarkLabel(threadIndexValidLabel); + + Operand threadLocalPartialUnmapsPtr = EmitThreadLocalMapIntGetValuePtr(context, localCountsPtr, threadIndex); + Operand threadLocalPartialUnmaps = context.Load(OperandType.I32, threadLocalPartialUnmapsPtr); + Operand partialUnmapsCount = context.Load(OperandType.I32, Const((ulong)IntPtr.Add(partialRemapStatePtr, PartialUnmapState.PartialUnmapsCountOffset))); + + context.Copy(retry, context.ICompareNotEqual(threadLocalPartialUnmaps, partialUnmapsCount)); + + Operand noRetryLabel = Label(); + + context.BranchIfFalse(noRetryLabel, retry); + + // if (retry) { + + context.Store(threadLocalPartialUnmapsPtr, partialUnmapsCount); + + context.Branch(endLabel); + + context.MarkLabel(noRetryLabel); + + // } + + context.MarkLabel(endLabel); + + // Finally, release the lock and return the retry value. + EmitNativeReaderLockRelease(context, IntPtr.Add(partialRemapStatePtr, PartialUnmapState.PartialUnmapLockOffset)); + + return retry; + } + + public static Operand EmitThreadLocalMapIntGetOrReserve(EmitterContext context, IntPtr threadLocalMapPtr, Operand threadId, Operand initialState) + { + Operand idsPtr = Const((ulong)IntPtr.Add(threadLocalMapPtr, ThreadLocalMap<int>.ThreadIdsOffset)); + + Operand i = context.AllocateLocal(OperandType.I32); + + context.Copy(i, Const(0)); + + // (Loop 1) Check all slots for a matching Thread ID (while also trying to allocate) + + Operand endLabel = Label(); + + Operand loopLabel = Label(); + context.MarkLabel(loopLabel); + + Operand offset = context.Multiply(i, Const(sizeof(int))); + Operand idPtr = context.Add(idsPtr, context.SignExtend32(OperandType.I64, offset)); + + // Check that this slot has the thread ID. + Operand existingId = context.CompareAndSwap(idPtr, threadId, threadId); + + // If it was already the thread ID, then we just need to return i. + context.BranchIfTrue(endLabel, context.ICompareEqual(existingId, threadId)); + + context.Copy(i, context.Add(i, Const(1))); + + context.BranchIfTrue(loopLabel, context.ICompareLess(i, Const(ThreadLocalMap<int>.MapSize))); + + // (Loop 2) Try take a slot that is 0 with our Thread ID. + + context.Copy(i, Const(0)); // Reset i. + + Operand loop2Label = Label(); + context.MarkLabel(loop2Label); + + Operand offset2 = context.Multiply(i, Const(sizeof(int))); + Operand idPtr2 = context.Add(idsPtr, context.SignExtend32(OperandType.I64, offset2)); + + // Try and swap in the thread id on top of 0. + Operand existingId2 = context.CompareAndSwap(idPtr2, Const(0), threadId); + + Operand idNot0Label = Label(); + + // If it was 0, then we need to initialize the struct entry and return i. + context.BranchIfFalse(idNot0Label, context.ICompareEqual(existingId2, Const(0))); + + Operand structsPtr = Const((ulong)IntPtr.Add(threadLocalMapPtr, ThreadLocalMap<int>.StructsOffset)); + Operand structPtr = context.Add(structsPtr, context.SignExtend32(OperandType.I64, offset2)); + context.Store(structPtr, initialState); + + context.Branch(endLabel); + + context.MarkLabel(idNot0Label); + + context.Copy(i, context.Add(i, Const(1))); + + context.BranchIfTrue(loop2Label, context.ICompareLess(i, Const(ThreadLocalMap<int>.MapSize))); + + context.Copy(i, Const(-1)); // Could not place the thread in the list. + + context.MarkLabel(endLabel); + + return context.Copy(i); + } + + private static Operand EmitThreadLocalMapIntGetValuePtr(EmitterContext context, IntPtr threadLocalMapPtr, Operand index) + { + Operand offset = context.Multiply(index, Const(sizeof(int))); + Operand structsPtr = Const((ulong)IntPtr.Add(threadLocalMapPtr, ThreadLocalMap<int>.StructsOffset)); + + return context.Add(structsPtr, context.SignExtend32(OperandType.I64, offset)); + } + + private static void EmitThreadLocalMapIntRelease(EmitterContext context, IntPtr threadLocalMapPtr, Operand threadId, Operand index) + { + Operand offset = context.Multiply(index, Const(sizeof(int))); + Operand idsPtr = Const((ulong)IntPtr.Add(threadLocalMapPtr, ThreadLocalMap<int>.ThreadIdsOffset)); + Operand idPtr = context.Add(idsPtr, context.SignExtend32(OperandType.I64, offset)); + + context.CompareAndSwap(idPtr, threadId, Const(0)); + } + + private static void EmitAtomicAddI32(EmitterContext context, Operand ptr, Operand additive) + { + Operand loop = Label(); + context.MarkLabel(loop); + + Operand initial = context.Load(OperandType.I32, ptr); + Operand newValue = context.Add(initial, additive); + + Operand replaced = context.CompareAndSwap(ptr, initial, newValue); + + context.BranchIfFalse(loop, context.ICompareEqual(initial, replaced)); + } + + private static void EmitNativeReaderLockAcquire(EmitterContext context, IntPtr nativeReaderLockPtr) + { + Operand writeLockPtr = Const((ulong)IntPtr.Add(nativeReaderLockPtr, NativeReaderWriterLock.WriteLockOffset)); + + // Spin until we can acquire the write lock. + Operand spinLabel = Label(); + context.MarkLabel(spinLabel); + + // Old value must be 0 to continue (we gained the write lock) + context.BranchIfTrue(spinLabel, context.CompareAndSwap(writeLockPtr, Const(0), Const(1))); + + // Increment reader count. + EmitAtomicAddI32(context, Const((ulong)IntPtr.Add(nativeReaderLockPtr, NativeReaderWriterLock.ReaderCountOffset)), Const(1)); + + // Release write lock. + context.CompareAndSwap(writeLockPtr, Const(1), Const(0)); + } + + private static void EmitNativeReaderLockRelease(EmitterContext context, IntPtr nativeReaderLockPtr) + { + // Decrement reader count. + EmitAtomicAddI32(context, Const((ulong)IntPtr.Add(nativeReaderLockPtr, NativeReaderWriterLock.ReaderCountOffset)), Const(-1)); + } + } +} diff --git a/src/ARMeilleure/Signal/WindowsSignalHandlerRegistration.cs b/src/ARMeilleure/Signal/WindowsSignalHandlerRegistration.cs new file mode 100644 index 00000000..3219e015 --- /dev/null +++ b/src/ARMeilleure/Signal/WindowsSignalHandlerRegistration.cs @@ -0,0 +1,44 @@ +using System; +using System.Runtime.InteropServices; + +namespace ARMeilleure.Signal +{ + unsafe partial class WindowsSignalHandlerRegistration + { + [LibraryImport("kernel32.dll")] + private static partial IntPtr AddVectoredExceptionHandler(uint first, IntPtr handler); + + [LibraryImport("kernel32.dll")] + private static partial ulong RemoveVectoredExceptionHandler(IntPtr handle); + + [LibraryImport("kernel32.dll", SetLastError = true, EntryPoint = "LoadLibraryA")] + private static partial IntPtr LoadLibrary([MarshalAs(UnmanagedType.LPStr)] string lpFileName); + + [LibraryImport("kernel32.dll", SetLastError = true)] + private static partial IntPtr GetProcAddress(IntPtr hModule, [MarshalAs(UnmanagedType.LPStr)] string procName); + + private static IntPtr _getCurrentThreadIdPtr; + + public static IntPtr RegisterExceptionHandler(IntPtr action) + { + return AddVectoredExceptionHandler(1, action); + } + + public static bool RemoveExceptionHandler(IntPtr handle) + { + return RemoveVectoredExceptionHandler(handle) != 0; + } + + public static IntPtr GetCurrentThreadIdFunc() + { + if (_getCurrentThreadIdPtr == IntPtr.Zero) + { + IntPtr handle = LoadLibrary("kernel32.dll"); + + _getCurrentThreadIdPtr = GetProcAddress(handle, "GetCurrentThreadId"); + } + + return _getCurrentThreadIdPtr; + } + } +} diff --git a/src/ARMeilleure/State/Aarch32Mode.cs b/src/ARMeilleure/State/Aarch32Mode.cs new file mode 100644 index 00000000..395e288a --- /dev/null +++ b/src/ARMeilleure/State/Aarch32Mode.cs @@ -0,0 +1,15 @@ +namespace ARMeilleure.State +{ + enum Aarch32Mode + { + User = 0b10000, + Fiq = 0b10001, + Irq = 0b10010, + Supervisor = 0b10011, + Monitor = 0b10110, + Abort = 0b10111, + Hypervisor = 0b11010, + Undefined = 0b11011, + System = 0b11111 + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/State/ExceptionCallback.cs b/src/ARMeilleure/State/ExceptionCallback.cs new file mode 100644 index 00000000..38d6eef7 --- /dev/null +++ b/src/ARMeilleure/State/ExceptionCallback.cs @@ -0,0 +1,5 @@ +namespace ARMeilleure.State +{ + public delegate void ExceptionCallbackNoArgs(ExecutionContext context); + public delegate void ExceptionCallback(ExecutionContext context, ulong address, int id); +}
\ No newline at end of file diff --git a/src/ARMeilleure/State/ExecutionContext.cs b/src/ARMeilleure/State/ExecutionContext.cs new file mode 100644 index 00000000..859fb3a5 --- /dev/null +++ b/src/ARMeilleure/State/ExecutionContext.cs @@ -0,0 +1,173 @@ +using ARMeilleure.Memory; +using System; + +namespace ARMeilleure.State +{ + public class ExecutionContext + { + private const int MinCountForCheck = 4000; + + private NativeContext _nativeContext; + + internal IntPtr NativeContextPtr => _nativeContext.BasePtr; + + private bool _interrupted; + + private readonly ICounter _counter; + + public ulong Pc => _nativeContext.GetPc(); + + public uint CtrEl0 => 0x8444c004; + public uint DczidEl0 => 0x00000004; + + public ulong CntfrqEl0 => _counter.Frequency; + public ulong CntpctEl0 => _counter.Counter; + + // CNTVCT_EL0 = CNTPCT_EL0 - CNTVOFF_EL2 + // Since EL2 isn't implemented, CNTVOFF_EL2 = 0 + public ulong CntvctEl0 => CntpctEl0; + + public long TpidrEl0 + { + get => _nativeContext.GetTpidrEl0(); + set => _nativeContext.SetTpidrEl0(value); + } + + public long TpidrroEl0 + { + get => _nativeContext.GetTpidrroEl0(); + set => _nativeContext.SetTpidrroEl0(value); + } + + public uint Pstate + { + get => _nativeContext.GetPstate(); + set => _nativeContext.SetPstate(value); + } + + public FPSR Fpsr + { + get => (FPSR)_nativeContext.GetFPState((uint)FPSR.Mask); + set => _nativeContext.SetFPState((uint)value, (uint)FPSR.Mask); + } + + public FPCR Fpcr + { + get => (FPCR)_nativeContext.GetFPState((uint)FPCR.Mask); + set => _nativeContext.SetFPState((uint)value, (uint)FPCR.Mask); + } + public FPCR StandardFpcrValue => (Fpcr & (FPCR.Ahp)) | FPCR.Dn | FPCR.Fz; + + public FPSCR Fpscr + { + get => (FPSCR)_nativeContext.GetFPState((uint)FPSCR.Mask); + set => _nativeContext.SetFPState((uint)value, (uint)FPSCR.Mask); + } + + public bool IsAarch32 { get; set; } + + internal ExecutionMode ExecutionMode + { + get + { + if (IsAarch32) + { + return GetPstateFlag(PState.TFlag) + ? ExecutionMode.Aarch32Thumb + : ExecutionMode.Aarch32Arm; + } + else + { + return ExecutionMode.Aarch64; + } + } + } + + public bool Running + { + get => _nativeContext.GetRunning(); + private set => _nativeContext.SetRunning(value); + } + + private readonly ExceptionCallbackNoArgs _interruptCallback; + private readonly ExceptionCallback _breakCallback; + private readonly ExceptionCallback _supervisorCallback; + private readonly ExceptionCallback _undefinedCallback; + + public ExecutionContext( + IJitMemoryAllocator allocator, + ICounter counter, + ExceptionCallbackNoArgs interruptCallback = null, + ExceptionCallback breakCallback = null, + ExceptionCallback supervisorCallback = null, + ExceptionCallback undefinedCallback = null) + { + _nativeContext = new NativeContext(allocator); + _counter = counter; + _interruptCallback = interruptCallback; + _breakCallback = breakCallback; + _supervisorCallback = supervisorCallback; + _undefinedCallback = undefinedCallback; + + Running = true; + + _nativeContext.SetCounter(MinCountForCheck); + } + + public ulong GetX(int index) => _nativeContext.GetX(index); + public void SetX(int index, ulong value) => _nativeContext.SetX(index, value); + + public V128 GetV(int index) => _nativeContext.GetV(index); + public void SetV(int index, V128 value) => _nativeContext.SetV(index, value); + + public bool GetPstateFlag(PState flag) => _nativeContext.GetPstateFlag(flag); + public void SetPstateFlag(PState flag, bool value) => _nativeContext.SetPstateFlag(flag, value); + + public bool GetFPstateFlag(FPState flag) => _nativeContext.GetFPStateFlag(flag); + public void SetFPstateFlag(FPState flag, bool value) => _nativeContext.SetFPStateFlag(flag, value); + + internal void CheckInterrupt() + { + if (_interrupted) + { + _interrupted = false; + + _interruptCallback?.Invoke(this); + } + + _nativeContext.SetCounter(MinCountForCheck); + } + + public void RequestInterrupt() + { + _interrupted = true; + } + + internal void OnBreak(ulong address, int imm) + { + _breakCallback?.Invoke(this, address, imm); + } + + internal void OnSupervisorCall(ulong address, int imm) + { + _supervisorCallback?.Invoke(this, address, imm); + } + + internal void OnUndefined(ulong address, int opCode) + { + _undefinedCallback?.Invoke(this, address, opCode); + } + + public void StopRunning() + { + Running = false; + + _nativeContext.SetCounter(0); + } + + public void Dispose() + { + _nativeContext.Dispose(); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/State/ExecutionMode.cs b/src/ARMeilleure/State/ExecutionMode.cs new file mode 100644 index 00000000..29154a25 --- /dev/null +++ b/src/ARMeilleure/State/ExecutionMode.cs @@ -0,0 +1,9 @@ +namespace ARMeilleure.State +{ + enum ExecutionMode : int + { + Aarch32Arm = 0, + Aarch32Thumb = 1, + Aarch64 = 2 + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/State/FPCR.cs b/src/ARMeilleure/State/FPCR.cs new file mode 100644 index 00000000..6f707de7 --- /dev/null +++ b/src/ARMeilleure/State/FPCR.cs @@ -0,0 +1,22 @@ +using System; + +namespace ARMeilleure.State +{ + [Flags] + public enum FPCR : uint + { + Ioe = 1u << 8, + Dze = 1u << 9, + Ofe = 1u << 10, + Ufe = 1u << 11, + Ixe = 1u << 12, + Ide = 1u << 15, + RMode0 = 1u << 22, + RMode1 = 1u << 23, + Fz = 1u << 24, + Dn = 1u << 25, + Ahp = 1u << 26, + + Mask = Ahp | Dn | Fz | RMode1 | RMode0 | Ide | Ixe | Ufe | Ofe | Dze | Ioe // 0x07C09F00u + } +} diff --git a/src/ARMeilleure/State/FPException.cs b/src/ARMeilleure/State/FPException.cs new file mode 100644 index 00000000..e24e07af --- /dev/null +++ b/src/ARMeilleure/State/FPException.cs @@ -0,0 +1,12 @@ +namespace ARMeilleure.State +{ + enum FPException + { + InvalidOp = 0, + DivideByZero = 1, + Overflow = 2, + Underflow = 3, + Inexact = 4, + InputDenorm = 7 + } +} diff --git a/src/ARMeilleure/State/FPRoundingMode.cs b/src/ARMeilleure/State/FPRoundingMode.cs new file mode 100644 index 00000000..8d757a15 --- /dev/null +++ b/src/ARMeilleure/State/FPRoundingMode.cs @@ -0,0 +1,11 @@ +namespace ARMeilleure.State +{ + public enum FPRoundingMode + { + ToNearest = 0, // With ties to even. + TowardsPlusInfinity = 1, + TowardsMinusInfinity = 2, + TowardsZero = 3, + ToNearestAway = 4 // With ties to away. + } +} diff --git a/src/ARMeilleure/State/FPSCR.cs b/src/ARMeilleure/State/FPSCR.cs new file mode 100644 index 00000000..d6d2fc26 --- /dev/null +++ b/src/ARMeilleure/State/FPSCR.cs @@ -0,0 +1,15 @@ +using System; + +namespace ARMeilleure.State +{ + [Flags] + public enum FPSCR : uint + { + V = 1u << 28, + C = 1u << 29, + Z = 1u << 30, + N = 1u << 31, + + Mask = N | Z | C | V | FPSR.Mask | FPCR.Mask // 0xFFC09F9Fu + } +} diff --git a/src/ARMeilleure/State/FPSR.cs b/src/ARMeilleure/State/FPSR.cs new file mode 100644 index 00000000..5e66d5ce --- /dev/null +++ b/src/ARMeilleure/State/FPSR.cs @@ -0,0 +1,18 @@ +using System; + +namespace ARMeilleure.State +{ + [Flags] + public enum FPSR : uint + { + Ioc = 1u << 0, + Dzc = 1u << 1, + Ofc = 1u << 2, + Ufc = 1u << 3, + Ixc = 1u << 4, + Idc = 1u << 7, + Qc = 1u << 27, + + Mask = Qc | Idc | Ixc | Ufc | Ofc | Dzc | Ioc // 0x0800009Fu + } +} diff --git a/src/ARMeilleure/State/FPState.cs b/src/ARMeilleure/State/FPState.cs new file mode 100644 index 00000000..fa6ab9d4 --- /dev/null +++ b/src/ARMeilleure/State/FPState.cs @@ -0,0 +1,31 @@ +namespace ARMeilleure.State +{ + public enum FPState + { + // FPSR Flags. + IocFlag = 0, + DzcFlag = 1, + OfcFlag = 2, + UfcFlag = 3, + IxcFlag = 4, + IdcFlag = 7, + QcFlag = 27, + VFlag = 28, + CFlag = 29, + ZFlag = 30, + NFlag = 31, + + // FPCR Flags. + IoeFlag = 8, + DzeFlag = 9, + OfeFlag = 10, + UfeFlag = 11, + IxeFlag = 12, + IdeFlag = 15, + RMode0Flag = 22, + RMode1Flag = 23, + FzFlag = 24, + DnFlag = 25, + AhpFlag = 26 + } +} diff --git a/src/ARMeilleure/State/FPType.cs b/src/ARMeilleure/State/FPType.cs new file mode 100644 index 00000000..84e0db8d --- /dev/null +++ b/src/ARMeilleure/State/FPType.cs @@ -0,0 +1,11 @@ +namespace ARMeilleure.State +{ + enum FPType + { + Nonzero, + Zero, + Infinity, + QNaN, + SNaN + } +} diff --git a/src/ARMeilleure/State/ICounter.cs b/src/ARMeilleure/State/ICounter.cs new file mode 100644 index 00000000..93e721ea --- /dev/null +++ b/src/ARMeilleure/State/ICounter.cs @@ -0,0 +1,18 @@ +namespace ARMeilleure.State +{ + /// <summary> + /// CPU Counter interface. + /// </summary> + public interface ICounter + { + /// <summary> + /// Counter frequency in Hertz. + /// </summary> + ulong Frequency { get; } + + /// <summary> + /// Current counter value. + /// </summary> + ulong Counter { get; } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/State/NativeContext.cs b/src/ARMeilleure/State/NativeContext.cs new file mode 100644 index 00000000..3189bdd8 --- /dev/null +++ b/src/ARMeilleure/State/NativeContext.cs @@ -0,0 +1,269 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Memory; +using System; +using System.Runtime.CompilerServices; + +namespace ARMeilleure.State +{ + class NativeContext : IDisposable + { + private unsafe struct NativeCtxStorage + { + public fixed ulong X[RegisterConsts.IntRegsCount]; + public fixed ulong V[RegisterConsts.VecRegsCount * 2]; + public fixed uint Flags[RegisterConsts.FlagsCount]; + public fixed uint FpFlags[RegisterConsts.FpFlagsCount]; + public long TpidrEl0; + public long TpidrroEl0; + public int Counter; + public ulong DispatchAddress; + public ulong ExclusiveAddress; + public ulong ExclusiveValueLow; + public ulong ExclusiveValueHigh; + public int Running; + } + + private static NativeCtxStorage _dummyStorage = new NativeCtxStorage(); + + private readonly IJitMemoryBlock _block; + + public IntPtr BasePtr => _block.Pointer; + + public NativeContext(IJitMemoryAllocator allocator) + { + _block = allocator.Allocate((ulong)Unsafe.SizeOf<NativeCtxStorage>()); + + GetStorage().ExclusiveAddress = ulong.MaxValue; + } + + public ulong GetPc() + { + // TODO: More precise tracking of PC value. + return GetStorage().DispatchAddress; + } + + public unsafe ulong GetX(int index) + { + if ((uint)index >= RegisterConsts.IntRegsCount) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + return GetStorage().X[index]; + } + + public unsafe void SetX(int index, ulong value) + { + if ((uint)index >= RegisterConsts.IntRegsCount) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + GetStorage().X[index] = value; + } + + public unsafe V128 GetV(int index) + { + if ((uint)index >= RegisterConsts.VecRegsCount) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + return new V128(GetStorage().V[index * 2 + 0], GetStorage().V[index * 2 + 1]); + } + + public unsafe void SetV(int index, V128 value) + { + if ((uint)index >= RegisterConsts.VecRegsCount) + { + throw new ArgumentOutOfRangeException(nameof(index)); + } + + GetStorage().V[index * 2 + 0] = value.Extract<ulong>(0); + GetStorage().V[index * 2 + 1] = value.Extract<ulong>(1); + } + + public unsafe bool GetPstateFlag(PState flag) + { + if ((uint)flag >= RegisterConsts.FlagsCount) + { + throw new ArgumentException($"Invalid flag \"{flag}\" specified."); + } + + return GetStorage().Flags[(int)flag] != 0; + } + + public unsafe void SetPstateFlag(PState flag, bool value) + { + if ((uint)flag >= RegisterConsts.FlagsCount) + { + throw new ArgumentException($"Invalid flag \"{flag}\" specified."); + } + + GetStorage().Flags[(int)flag] = value ? 1u : 0u; + } + + public unsafe uint GetPstate() + { + uint value = 0; + for (int flag = 0; flag < RegisterConsts.FlagsCount; flag++) + { + value |= GetStorage().Flags[flag] != 0 ? 1u << flag : 0u; + } + return value; + } + + public unsafe void SetPstate(uint value) + { + for (int flag = 0; flag < RegisterConsts.FlagsCount; flag++) + { + uint bit = 1u << flag; + GetStorage().Flags[flag] = (value & bit) == bit ? 1u : 0u; + } + } + + public unsafe bool GetFPStateFlag(FPState flag) + { + if ((uint)flag >= RegisterConsts.FpFlagsCount) + { + throw new ArgumentException($"Invalid flag \"{flag}\" specified."); + } + + return GetStorage().FpFlags[(int)flag] != 0; + } + + public unsafe void SetFPStateFlag(FPState flag, bool value) + { + if ((uint)flag >= RegisterConsts.FpFlagsCount) + { + throw new ArgumentException($"Invalid flag \"{flag}\" specified."); + } + + GetStorage().FpFlags[(int)flag] = value ? 1u : 0u; + } + + public unsafe uint GetFPState(uint mask = uint.MaxValue) + { + uint value = 0; + for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++) + { + uint bit = 1u << flag; + + if ((mask & bit) == bit) + { + value |= GetStorage().FpFlags[flag] != 0 ? bit : 0u; + } + } + return value; + } + + public unsafe void SetFPState(uint value, uint mask = uint.MaxValue) + { + for (int flag = 0; flag < RegisterConsts.FpFlagsCount; flag++) + { + uint bit = 1u << flag; + + if ((mask & bit) == bit) + { + GetStorage().FpFlags[flag] = (value & bit) == bit ? 1u : 0u; + } + } + } + + public long GetTpidrEl0() => GetStorage().TpidrEl0; + public void SetTpidrEl0(long value) => GetStorage().TpidrEl0 = value; + + public long GetTpidrroEl0() => GetStorage().TpidrroEl0; + public void SetTpidrroEl0(long value) => GetStorage().TpidrroEl0 = value; + + public int GetCounter() => GetStorage().Counter; + public void SetCounter(int value) => GetStorage().Counter = value; + + public bool GetRunning() => GetStorage().Running != 0; + public void SetRunning(bool value) => GetStorage().Running = value ? 1 : 0; + + public unsafe static int GetRegisterOffset(Register reg) + { + if (reg.Type == RegisterType.Integer) + { + if ((uint)reg.Index >= RegisterConsts.IntRegsCount) + { + throw new ArgumentException("Invalid register."); + } + + return StorageOffset(ref _dummyStorage, ref _dummyStorage.X[reg.Index]); + } + else if (reg.Type == RegisterType.Vector) + { + if ((uint)reg.Index >= RegisterConsts.VecRegsCount) + { + throw new ArgumentException("Invalid register."); + } + + return StorageOffset(ref _dummyStorage, ref _dummyStorage.V[reg.Index * 2]); + } + else if (reg.Type == RegisterType.Flag) + { + if ((uint)reg.Index >= RegisterConsts.FlagsCount) + { + throw new ArgumentException("Invalid register."); + } + + return StorageOffset(ref _dummyStorage, ref _dummyStorage.Flags[reg.Index]); + } + else /* if (reg.Type == RegisterType.FpFlag) */ + { + if ((uint)reg.Index >= RegisterConsts.FpFlagsCount) + { + throw new ArgumentException("Invalid register."); + } + + return StorageOffset(ref _dummyStorage, ref _dummyStorage.FpFlags[reg.Index]); + } + } + + public static int GetTpidrEl0Offset() + { + return StorageOffset(ref _dummyStorage, ref _dummyStorage.TpidrEl0); + } + + public static int GetTpidrroEl0Offset() + { + return StorageOffset(ref _dummyStorage, ref _dummyStorage.TpidrroEl0); + } + + public static int GetCounterOffset() + { + return StorageOffset(ref _dummyStorage, ref _dummyStorage.Counter); + } + + public static int GetDispatchAddressOffset() + { + return StorageOffset(ref _dummyStorage, ref _dummyStorage.DispatchAddress); + } + + public static int GetExclusiveAddressOffset() + { + return StorageOffset(ref _dummyStorage, ref _dummyStorage.ExclusiveAddress); + } + + public static int GetExclusiveValueOffset() + { + return StorageOffset(ref _dummyStorage, ref _dummyStorage.ExclusiveValueLow); + } + + public static int GetRunningOffset() + { + return StorageOffset(ref _dummyStorage, ref _dummyStorage.Running); + } + + private static int StorageOffset<T>(ref NativeCtxStorage storage, ref T target) + { + return (int)Unsafe.ByteOffset(ref Unsafe.As<NativeCtxStorage, T>(ref storage), ref target); + } + + private unsafe ref NativeCtxStorage GetStorage() => ref Unsafe.AsRef<NativeCtxStorage>((void*)_block.Pointer); + + public void Dispose() => _block.Dispose(); + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/State/PState.cs b/src/ARMeilleure/State/PState.cs new file mode 100644 index 00000000..9a80bc57 --- /dev/null +++ b/src/ARMeilleure/State/PState.cs @@ -0,0 +1,17 @@ +namespace ARMeilleure.State +{ + public enum PState + { + TFlag = 5, + EFlag = 9, + GE0Flag = 16, + GE1Flag = 17, + GE2Flag = 18, + GE3Flag = 19, + QFlag = 27, + VFlag = 28, + CFlag = 29, + ZFlag = 30, + NFlag = 31 + } +} diff --git a/src/ARMeilleure/State/RegisterAlias.cs b/src/ARMeilleure/State/RegisterAlias.cs new file mode 100644 index 00000000..7ebfa275 --- /dev/null +++ b/src/ARMeilleure/State/RegisterAlias.cs @@ -0,0 +1,42 @@ +namespace ARMeilleure.State +{ + static class RegisterAlias + { + public const int R8Usr = 8; + public const int R9Usr = 9; + public const int R10Usr = 10; + public const int R11Usr = 11; + public const int R12Usr = 12; + public const int SpUsr = 13; + public const int LrUsr = 14; + + public const int SpHyp = 15; + + public const int LrIrq = 16; + public const int SpIrq = 17; + + public const int LrSvc = 18; + public const int SpSvc = 19; + + public const int LrAbt = 20; + public const int SpAbt = 21; + + public const int LrUnd = 22; + public const int SpUnd = 23; + + public const int R8Fiq = 24; + public const int R9Fiq = 25; + public const int R10Fiq = 26; + public const int R11Fiq = 27; + public const int R12Fiq = 28; + public const int SpFiq = 29; + public const int LrFiq = 30; + + public const int Aarch32Sp = 13; + public const int Aarch32Lr = 14; + public const int Aarch32Pc = 15; + + public const int Lr = 30; + public const int Zr = 31; + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/State/RegisterConsts.cs b/src/ARMeilleure/State/RegisterConsts.cs new file mode 100644 index 00000000..d6294080 --- /dev/null +++ b/src/ARMeilleure/State/RegisterConsts.cs @@ -0,0 +1,15 @@ +namespace ARMeilleure.State +{ + static class RegisterConsts + { + public const int IntRegsCount = 32; + public const int VecRegsCount = 32; + public const int FlagsCount = 32; + public const int FpFlagsCount = 32; + public const int IntAndVecRegsCount = IntRegsCount + VecRegsCount; + public const int FpFlagsOffset = IntRegsCount + VecRegsCount + FlagsCount; + public const int TotalCount = IntRegsCount + VecRegsCount + FlagsCount + FpFlagsCount; + + public const int ZeroIndex = 31; + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/State/V128.cs b/src/ARMeilleure/State/V128.cs new file mode 100644 index 00000000..3fa9f9a9 --- /dev/null +++ b/src/ARMeilleure/State/V128.cs @@ -0,0 +1,312 @@ +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace ARMeilleure.State +{ + /// <summary> + /// Represents a 128-bit vector. + /// </summary> + [StructLayout(LayoutKind.Sequential, Size = 16)] + public struct V128 : IEquatable<V128> + { + // _e0 & _e1 could be marked as readonly, however they are not readonly because we modify them through the Unsafe + // APIs. This also means that one should be careful when changing the layout of this struct. + + private ulong _e0; + private ulong _e1; + + /// <summary> + /// Gets a new <see cref="V128"/> with all bits set to zero. + /// </summary> + public static V128 Zero => new V128(0, 0); + + /// <summary> + /// Initializes a new instance of the <see cref="V128"/> struct with the specified <see cref="double"/> value + /// as a scalar. + /// </summary> + /// <param name="value">Scalar value</param> + public V128(double value) : this(value, 0) { } + + /// <summary> + /// Initializes a new instance of the <see cref="V128"/> struct with the specified <see cref="double"/> elements. + /// </summary> + /// <param name="e0">Element 0</param> + /// <param name="e1">Element 1</param> + public V128(double e0, double e1) + { + _e0 = (ulong)BitConverter.DoubleToInt64Bits(e0); + _e1 = (ulong)BitConverter.DoubleToInt64Bits(e1); + } + + /// <summary> + /// Initializes a new instance of the <see cref="V128"/> struct with the specified <see cref="float"/> value as a + /// scalar. + /// </summary> + /// <param name="value">Scalar value</param> + public V128(float value) : this(value, 0, 0, 0) { } + + /// <summary> + /// Initializes a new instance of the <see cref="V128"/> struct with the specified <see cref="float"/> elements. + /// </summary> + /// <param name="e0">Element 0</param> + /// <param name="e1">Element 1</param> + /// <param name="e2">Element 2</param> + /// <param name="e3">Element 3</param> + public V128(float e0, float e1, float e2, float e3) + { + _e0 = (ulong)(uint)BitConverter.SingleToInt32Bits(e0) << 0; + _e0 |= (ulong)(uint)BitConverter.SingleToInt32Bits(e1) << 32; + _e1 = (ulong)(uint)BitConverter.SingleToInt32Bits(e2) << 0; + _e1 |= (ulong)(uint)BitConverter.SingleToInt32Bits(e3) << 32; + } + + /// <summary> + /// Initializes a new instance of the <see cref="V128"/> struct with the specified <see cref="ulong"/> + /// elements. + /// </summary> + /// <param name="e0">Element 0</param> + /// <param name="e1">Element 1</param> + public V128(long e0, long e1) : this((ulong)e0, (ulong)e1) { } + + /// <summary> + /// Initializes a new instance of the <see cref="V128"/> struct with the specified <see cref="long"/> elements. + /// </summary> + /// <param name="e0">Element 0</param> + /// <param name="e1">Element 1</param> + public V128(ulong e0, ulong e1) + { + _e0 = e0; + _e1 = e1; + } + + /// <summary> + /// Initializes a new instance of the <see cref="V128"/> struct with the specified <see cref="int"/> elements. + /// </summary> + /// <param name="e0">Element 0</param> + /// <param name="e1">Element 1</param> + /// <param name="e2">Element 2</param> + /// <param name="e3">Element 3</param> + public V128(int e0, int e1, int e2, int e3) : this((uint)e0, (uint)e1, (uint)e2, (uint)e3) { } + + /// <summary> + /// Initializes a new instance of the <see cref="V128"/> struct with the specified <see cref="uint"/> elements. + /// </summary> + /// <param name="e0">Element 0</param> + /// <param name="e1">Element 1</param> + /// <param name="e2">Element 2</param> + /// <param name="e3">Element 3</param> + public V128(uint e0, uint e1, uint e2, uint e3) + { + _e0 = (ulong)e0 << 0; + _e0 |= (ulong)e1 << 32; + _e1 = (ulong)e2 << 0; + _e1 |= (ulong)e3 << 32; + } + + /// <summary> + /// Initializes a new instance of the <see cref="V128"/> struct from the specified <see cref="byte"/> array. + /// </summary> + /// <param name="data"><see cref="byte"/> array to use</param> + public V128(byte[] data) + { + _e0 = (ulong)BitConverter.ToInt64(data, 0); + _e1 = (ulong)BitConverter.ToInt64(data, 8); + } + + /// <summary> + /// Returns the value of the <see cref="V128"/> as a <typeparamref name="T"/> scalar. + /// </summary> + /// <typeparam name="T">Type of scalar</typeparam> + /// <returns>Value of the <see cref="V128"/> as a <typeparamref name="T"/> scalar</returns> + /// <exception cref="ArgumentOutOfRangeException">Size of <typeparamref name="T"/> is larger than 16 bytes</exception> + public T As<T>() where T : unmanaged + { + return Extract<T>(0); + } + + /// <summary> + /// Extracts the element at the specified index as a <typeparamref name="T"/> from the <see cref="V128"/>. + /// </summary> + /// <typeparam name="T">Element type</typeparam> + /// <param name="index">Index of element</param> + /// <returns>Element at the specified index as a <typeparamref name="T"/> from the <see cref="V128"/></returns> + /// <exception cref="ArgumentOutOfRangeException"> + /// <paramref name="index"/> is out of bound or the size of <typeparamref name="T"/> is larger than 16 bytes + /// </exception> + public T Extract<T>(int index) where T : unmanaged + { + if ((uint)index >= GetElementCount<T>()) + ThrowIndexOutOfRange(); + + // Performs: + // return *((*T)this + index); + return Unsafe.Add(ref Unsafe.As<V128, T>(ref this), index); + } + + /// <summary> + /// Inserts the specified value into the element at the specified index in the <see cref="V128"/>. + /// </summary> + /// <typeparam name="T">Element type</typeparam> + /// <param name="index">Index of element</param> + /// <param name="value">Value to insert</param> + /// <exception cref="ArgumentOutOfRangeException"> + /// <paramref name="index"/> is out of bound or the size of <typeparamref name="T"/> is larger than 16 bytes + /// </exception> + public void Insert<T>(int index, T value) where T : unmanaged + { + if ((uint)index >= GetElementCount<T>()) + ThrowIndexOutOfRange(); + + // Performs: + // *((*T)this + index) = value; + Unsafe.Add(ref Unsafe.As<V128, T>(ref this), index) = value; + } + + /// <summary> + /// Returns a new <see cref="byte"/> array which represents the <see cref="V128"/>. + /// </summary> + /// <returns>A new <see cref="byte"/> array which represents the <see cref="V128"/></returns> + public byte[] ToArray() + { + byte[] data = new byte[16]; + Span<byte> span = data; + + BitConverter.TryWriteBytes(span, _e0); + BitConverter.TryWriteBytes(span.Slice(8), _e1); + + return data; + } + + /// <summary> + /// Performs a bitwise logical left shift on the specified <see cref="V128"/> by the specified shift count. + /// </summary> + /// <param name="x"><see cref="V128"/> instance</param> + /// <param name="shift">Number of shifts</param> + /// <returns>Result of left shift</returns> + /// <remarks> + /// This supports shift counts up to 63; anything above may result in unexpected behaviour. + /// </remarks> + public static V128 operator <<(V128 x, int shift) + { + if (shift == 0) + { + return new V128(x._e0, x._e1); + } + + ulong shiftOut = x._e0 >> (64 - shift); + + return new V128(x._e0 << shift, (x._e1 << shift) | shiftOut); + } + + /// <summary> + /// Performs a bitwise logical right shift on the specified <see cref="V128"/> by the specified shift count. + /// </summary> + /// <param name="x"><see cref="V128"/> instance</param> + /// <param name="shift">Number of shifts</param> + /// <returns>Result of right shift</returns> + /// <remarks> + /// This supports shift counts up to 63; anything above may result in unexpected behaviour. + /// </remarks> + public static V128 operator >>(V128 x, int shift) + { + if (shift == 0) + { + return new V128(x._e0, x._e1); + } + + ulong shiftOut = x._e1 & ((1UL << shift) - 1); + + return new V128((x._e0 >> shift) | (shiftOut << (64 - shift)), x._e1 >> shift); + } + + /// <summary> + /// Performs a bitwise not on the specified <see cref="V128"/>. + /// </summary> + /// <param name="x">Target <see cref="V128"/></param> + /// <returns>Result of not operation</returns> + public static V128 operator ~(V128 x) => new V128(~x._e0, ~x._e1); + + /// <summary> + /// Performs a bitwise and on the specified <see cref="V128"/> instances. + /// </summary> + /// <param name="x">First instance</param> + /// <param name="y">Second instance</param> + /// <returns>Result of and operation</returns> + public static V128 operator &(V128 x, V128 y) => new V128(x._e0 & y._e0, x._e1 & y._e1); + + /// <summary> + /// Performs a bitwise or on the specified <see cref="V128"/> instances. + /// </summary> + /// <param name="x">First instance</param> + /// <param name="y">Second instance</param> + /// <returns>Result of or operation</returns> + public static V128 operator |(V128 x, V128 y) => new V128(x._e0 | y._e0, x._e1 | y._e1); + + /// <summary> + /// Performs a bitwise exlusive or on the specified <see cref="V128"/> instances. + /// </summary> + /// <param name="x">First instance</param> + /// <param name="y">Second instance</param> + /// <returns>Result of exclusive or operation</returns> + public static V128 operator ^(V128 x, V128 y) => new V128(x._e0 ^ y._e0, x._e1 ^ y._e1); + + /// <summary> + /// Determines if the specified <see cref="V128"/> instances are equal. + /// </summary> + /// <param name="x">First instance</param> + /// <param name="y">Second instance</param> + /// <returns>true if equal; otherwise false</returns> + public static bool operator ==(V128 x, V128 y) => x.Equals(y); + + /// <summary> + /// Determines if the specified <see cref="V128"/> instances are not equal. + /// </summary> + /// <param name="x">First instance</param> + /// <param name="y">Second instance</param> + /// <returns>true if not equal; otherwise false</returns> + public static bool operator !=(V128 x, V128 y) => !x.Equals(y); + + /// <summary> + /// Determines if the specified <see cref="V128"/> is equal to this <see cref="V128"/> instance. + /// </summary> + /// <param name="other">Other <see cref="V128"/> instance</param> + /// <returns>true if equal; otherwise false</returns> + public bool Equals(V128 other) + { + return other._e0 == _e0 && other._e1 == _e1; + } + + /// <summary> + /// Determines if the specified <see cref="object"/> is equal to this <see cref="V128"/> instance. + /// </summary> + /// <param name="obj">Other <see cref="object"/> instance</param> + /// <returns>true if equal; otherwise false</returns> + public override bool Equals(object obj) + { + return obj is V128 vector && Equals(vector); + } + + /// <inheritdoc/> + public override int GetHashCode() + { + return HashCode.Combine(_e0, _e1); + } + + /// <inheritdoc/> + public override string ToString() + { + return $"0x{_e1:X16}{_e0:X16}"; + } + + private uint GetElementCount<T>() where T : unmanaged + { + return (uint)(Unsafe.SizeOf<V128>() / Unsafe.SizeOf<T>()); + } + + private static void ThrowIndexOutOfRange() + { + throw new ArgumentOutOfRangeException("index"); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Statistics.cs b/src/ARMeilleure/Statistics.cs new file mode 100644 index 00000000..fbc64708 --- /dev/null +++ b/src/ARMeilleure/Statistics.cs @@ -0,0 +1,94 @@ +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Text; + +namespace ARMeilleure +{ + public static class Statistics + { + private const int ReportMaxFunctions = 100; + +#pragma warning disable CS0169 + [ThreadStatic] + private static Stopwatch _executionTimer; +#pragma warning restore CS0169 + + private static ConcurrentDictionary<ulong, long> _ticksPerFunction; + + static Statistics() + { + _ticksPerFunction = new ConcurrentDictionary<ulong, long>(); + } + + public static void InitializeTimer() + { +#if M_PROFILE + if (_executionTimer == null) + { + _executionTimer = new Stopwatch(); + } +#endif + } + + internal static void StartTimer() + { +#if M_PROFILE + _executionTimer.Restart(); +#endif + } + + internal static void StopTimer(ulong funcAddr) + { +#if M_PROFILE + _executionTimer.Stop(); + + long ticks = _executionTimer.ElapsedTicks; + + _ticksPerFunction.AddOrUpdate(funcAddr, ticks, (key, oldTicks) => oldTicks + ticks); +#endif + } + + internal static void ResumeTimer() + { +#if M_PROFILE + _executionTimer.Start(); +#endif + } + + internal static void PauseTimer() + { +#if M_PROFILE + _executionTimer.Stop(); +#endif + } + + public static string GetReport() + { + int count = 0; + + StringBuilder sb = new StringBuilder(); + + sb.AppendLine(" Function address | Time"); + sb.AppendLine("--------------------------"); + + KeyValuePair<ulong, long>[] funcTable = _ticksPerFunction.ToArray(); + + foreach (KeyValuePair<ulong, long> kv in funcTable.OrderByDescending(x => x.Value)) + { + long timeInMs = (kv.Value * 1000) / Stopwatch.Frequency; + + sb.AppendLine($" 0x{kv.Key:X16} | {timeInMs} ms"); + + if (count++ >= ReportMaxFunctions) + { + break; + } + } + + return sb.ToString(); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Translation/ArmEmitterContext.cs b/src/ARMeilleure/Translation/ArmEmitterContext.cs new file mode 100644 index 00000000..565d2aad --- /dev/null +++ b/src/ARMeilleure/Translation/ArmEmitterContext.cs @@ -0,0 +1,282 @@ +using ARMeilleure.CodeGen.Linking; +using ARMeilleure.Common; +using ARMeilleure.Decoders; +using ARMeilleure.Diagnostics; +using ARMeilleure.Instructions; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Memory; +using ARMeilleure.State; +using System; +using System.Collections.Generic; +using System.Reflection; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Translation +{ + class ArmEmitterContext : EmitterContext + { + private readonly Dictionary<ulong, Operand> _labels; + + private OpCode _optOpLastCompare; + private OpCode _optOpLastFlagSet; + + private Operand _optCmpTempN; + private Operand _optCmpTempM; + + private Block _currBlock; + + public Block CurrBlock + { + get + { + return _currBlock; + } + set + { + _currBlock = value; + + ResetBlockState(); + } + } + + private bool _pendingQcFlagSync; + + public OpCode CurrOp { get; set; } + + public IMemoryManager Memory { get; } + + public EntryTable<uint> CountTable { get; } + public AddressTable<ulong> FunctionTable { get; } + public TranslatorStubs Stubs { get; } + + public ulong EntryAddress { get; } + public bool HighCq { get; } + public bool HasPtc { get; } + public Aarch32Mode Mode { get; } + + private int _ifThenBlockStateIndex = 0; + private Condition[] _ifThenBlockState = { }; + public bool IsInIfThenBlock => _ifThenBlockStateIndex < _ifThenBlockState.Length; + public Condition CurrentIfThenBlockCond => _ifThenBlockState[_ifThenBlockStateIndex]; + + public ArmEmitterContext( + IMemoryManager memory, + EntryTable<uint> countTable, + AddressTable<ulong> funcTable, + TranslatorStubs stubs, + ulong entryAddress, + bool highCq, + bool hasPtc, + Aarch32Mode mode) + { + Memory = memory; + CountTable = countTable; + FunctionTable = funcTable; + Stubs = stubs; + EntryAddress = entryAddress; + HighCq = highCq; + HasPtc = hasPtc; + Mode = mode; + + _labels = new Dictionary<ulong, Operand>(); + } + + public override Operand Call(MethodInfo info, params Operand[] callArgs) + { + SyncQcFlag(); + + if (!HasPtc) + { + return base.Call(info, callArgs); + } + else + { + int index = Delegates.GetDelegateIndex(info); + IntPtr funcPtr = Delegates.GetDelegateFuncPtrByIndex(index); + + OperandType returnType = GetOperandType(info.ReturnType); + + Symbol symbol = new Symbol(SymbolType.DelegateTable, (ulong)index); + + Symbols.Add((ulong)funcPtr.ToInt64(), info.Name); + + return Call(Const(funcPtr.ToInt64(), symbol), returnType, callArgs); + } + } + + public Operand GetLabel(ulong address) + { + if (!_labels.TryGetValue(address, out Operand label)) + { + label = Label(); + + _labels.Add(address, label); + } + + return label; + } + + public void MarkComparison(Operand n, Operand m) + { + _optOpLastCompare = CurrOp; + + _optCmpTempN = Copy(n); + _optCmpTempM = Copy(m); + } + + public void MarkFlagSet(PState stateFlag) + { + // Set this only if any of the NZCV flag bits were modified. + // This is used to ensure that when emiting a direct IL branch + // instruction for compare + branch sequences, we're not expecting + // to use comparison values from an old instruction, when in fact + // the flags were already overwritten by another instruction further along. + if (stateFlag >= PState.VFlag) + { + _optOpLastFlagSet = CurrOp; + } + } + + private void ResetBlockState() + { + _optOpLastCompare = null; + _optOpLastFlagSet = null; + } + + public void SetPendingQcFlagSync() + { + _pendingQcFlagSync = true; + } + + public void SyncQcFlag() + { + if (_pendingQcFlagSync) + { + if (Optimizations.UseAdvSimd) + { + Operand fpsr = AddIntrinsicInt(Intrinsic.Arm64MrsFpsr); + + uint qcFlagMask = (uint)FPSR.Qc; + + Operand qcClearLabel = Label(); + + BranchIfFalse(qcClearLabel, BitwiseAnd(fpsr, Const(qcFlagMask))); + + AddIntrinsicNoRet(Intrinsic.Arm64MsrFpsr, Const(0)); + InstEmitHelper.SetFpFlag(this, FPState.QcFlag, Const(1)); + + MarkLabel(qcClearLabel); + } + + _pendingQcFlagSync = false; + } + } + + public void ClearQcFlag() + { + if (Optimizations.UseAdvSimd) + { + AddIntrinsicNoRet(Intrinsic.Arm64MsrFpsr, Const(0)); + } + } + + public void ClearQcFlagIfModified() + { + if (_pendingQcFlagSync && Optimizations.UseAdvSimd) + { + AddIntrinsicNoRet(Intrinsic.Arm64MsrFpsr, Const(0)); + } + } + + public void EnterArmFpMode() + { + InstEmitSimdHelper.EnterArmFpMode(this, InstEmitHelper.GetFpFlag); + } + + public void UpdateArmFpMode() + { + EnterArmFpMode(); + } + + public void ExitArmFpMode() + { + InstEmitSimdHelper.ExitArmFpMode(this, (flag, value) => InstEmitHelper.SetFpFlag(this, flag, value)); + } + + public Operand TryGetComparisonResult(Condition condition) + { + if (_optOpLastCompare == null || _optOpLastCompare != _optOpLastFlagSet) + { + return default; + } + + Operand n = _optCmpTempN; + Operand m = _optCmpTempM; + + InstName cmpName = _optOpLastCompare.Instruction.Name; + + if (cmpName == InstName.Subs) + { + switch (condition) + { + case Condition.Eq: return ICompareEqual (n, m); + case Condition.Ne: return ICompareNotEqual (n, m); + case Condition.GeUn: return ICompareGreaterOrEqualUI(n, m); + case Condition.LtUn: return ICompareLessUI (n, m); + case Condition.GtUn: return ICompareGreaterUI (n, m); + case Condition.LeUn: return ICompareLessOrEqualUI (n, m); + case Condition.Ge: return ICompareGreaterOrEqual (n, m); + case Condition.Lt: return ICompareLess (n, m); + case Condition.Gt: return ICompareGreater (n, m); + case Condition.Le: return ICompareLessOrEqual (n, m); + } + } + else if (cmpName == InstName.Adds && _optOpLastCompare is IOpCodeAluImm op) + { + // There are several limitations that needs to be taken into account for CMN comparisons: + // - The unsigned comparisons are not valid, as they depend on the + // carry flag value, and they will have different values for addition and + // subtraction. For addition, it's carry, and for subtraction, it's borrow. + // So, we need to make sure we're not doing a unsigned compare for the CMN case. + // - We can only do the optimization for the immediate variants, + // because when the second operand value is exactly INT_MIN, we can't + // negate the value as theres no positive counterpart. + // Such invalid values can't be encoded on the immediate encodings. + if (op.RegisterSize == RegisterSize.Int32) + { + m = Const((int)-op.Immediate); + } + else + { + m = Const(-op.Immediate); + } + + switch (condition) + { + case Condition.Eq: return ICompareEqual (n, m); + case Condition.Ne: return ICompareNotEqual (n, m); + case Condition.Ge: return ICompareGreaterOrEqual(n, m); + case Condition.Lt: return ICompareLess (n, m); + case Condition.Gt: return ICompareGreater (n, m); + case Condition.Le: return ICompareLessOrEqual (n, m); + } + } + + return default; + } + + public void SetIfThenBlockState(Condition[] state) + { + _ifThenBlockState = state; + _ifThenBlockStateIndex = 0; + } + + public void AdvanceIfThenBlockState() + { + if (IsInIfThenBlock) + { + _ifThenBlockStateIndex++; + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Translation/Cache/CacheEntry.cs b/src/ARMeilleure/Translation/Cache/CacheEntry.cs new file mode 100644 index 00000000..dc5503b1 --- /dev/null +++ b/src/ARMeilleure/Translation/Cache/CacheEntry.cs @@ -0,0 +1,26 @@ +using ARMeilleure.CodeGen.Unwinding; +using System; +using System.Diagnostics.CodeAnalysis; + +namespace ARMeilleure.Translation.Cache +{ + readonly struct CacheEntry : IComparable<CacheEntry> + { + public int Offset { get; } + public int Size { get; } + + public UnwindInfo UnwindInfo { get; } + + public CacheEntry(int offset, int size, UnwindInfo unwindInfo) + { + Offset = offset; + Size = size; + UnwindInfo = unwindInfo; + } + + public int CompareTo([AllowNull] CacheEntry other) + { + return Offset.CompareTo(other.Offset); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Translation/Cache/CacheMemoryAllocator.cs b/src/ARMeilleure/Translation/Cache/CacheMemoryAllocator.cs new file mode 100644 index 00000000..4c22de40 --- /dev/null +++ b/src/ARMeilleure/Translation/Cache/CacheMemoryAllocator.cs @@ -0,0 +1,96 @@ +using System; +using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; + +namespace ARMeilleure.Translation.Cache +{ + class CacheMemoryAllocator + { + private readonly struct MemoryBlock : IComparable<MemoryBlock> + { + public int Offset { get; } + public int Size { get; } + + public MemoryBlock(int offset, int size) + { + Offset = offset; + Size = size; + } + + public int CompareTo([AllowNull] MemoryBlock other) + { + return Offset.CompareTo(other.Offset); + } + } + + private readonly List<MemoryBlock> _blocks = new List<MemoryBlock>(); + + public CacheMemoryAllocator(int capacity) + { + _blocks.Add(new MemoryBlock(0, capacity)); + } + + public int Allocate(int size) + { + for (int i = 0; i < _blocks.Count; i++) + { + MemoryBlock block = _blocks[i]; + + if (block.Size > size) + { + _blocks[i] = new MemoryBlock(block.Offset + size, block.Size - size); + return block.Offset; + } + else if (block.Size == size) + { + _blocks.RemoveAt(i); + return block.Offset; + } + } + + // We don't have enough free memory to perform the allocation. + return -1; + } + + public void Free(int offset, int size) + { + Insert(new MemoryBlock(offset, size)); + } + + private void Insert(MemoryBlock block) + { + int index = _blocks.BinarySearch(block); + + if (index < 0) + { + index = ~index; + } + + if (index < _blocks.Count) + { + MemoryBlock next = _blocks[index]; + + int endOffs = block.Offset + block.Size; + + if (next.Offset == endOffs) + { + block = new MemoryBlock(block.Offset, block.Size + next.Size); + _blocks.RemoveAt(index); + } + } + + if (index > 0) + { + MemoryBlock prev = _blocks[index - 1]; + + if (prev.Offset + prev.Size == block.Offset) + { + block = new MemoryBlock(block.Offset - prev.Size, block.Size + prev.Size); + _blocks.RemoveAt(--index); + } + } + + _blocks.Insert(index, block); + } + } +} diff --git a/src/ARMeilleure/Translation/Cache/JitCache.cs b/src/ARMeilleure/Translation/Cache/JitCache.cs new file mode 100644 index 00000000..f496a8e9 --- /dev/null +++ b/src/ARMeilleure/Translation/Cache/JitCache.cs @@ -0,0 +1,198 @@ +using ARMeilleure.CodeGen; +using ARMeilleure.CodeGen.Unwinding; +using ARMeilleure.Memory; +using ARMeilleure.Native; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Runtime.InteropServices; + +namespace ARMeilleure.Translation.Cache +{ + static class JitCache + { + private const int PageSize = 4 * 1024; + private const int PageMask = PageSize - 1; + + private const int CodeAlignment = 4; // Bytes. + private const int CacheSize = 2047 * 1024 * 1024; + + private static ReservedRegion _jitRegion; + private static JitCacheInvalidation _jitCacheInvalidator; + + private static CacheMemoryAllocator _cacheAllocator; + + private static readonly List<CacheEntry> _cacheEntries = new List<CacheEntry>(); + + private static readonly object _lock = new object(); + private static bool _initialized; + + public static void Initialize(IJitMemoryAllocator allocator) + { + if (_initialized) return; + + lock (_lock) + { + if (_initialized) return; + + _jitRegion = new ReservedRegion(allocator, CacheSize); + _jitCacheInvalidator = new JitCacheInvalidation(allocator); + + _cacheAllocator = new CacheMemoryAllocator(CacheSize); + + if (OperatingSystem.IsWindows()) + { + JitUnwindWindows.InstallFunctionTableHandler(_jitRegion.Pointer, CacheSize, _jitRegion.Pointer + Allocate(PageSize)); + } + + _initialized = true; + } + } + + public static IntPtr Map(CompiledFunction func) + { + byte[] code = func.Code; + + lock (_lock) + { + Debug.Assert(_initialized); + + int funcOffset = Allocate(code.Length); + + IntPtr funcPtr = _jitRegion.Pointer + funcOffset; + + if (OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64) + { + unsafe + { + fixed (byte *codePtr = code) + { + JitSupportDarwin.Copy(funcPtr, (IntPtr)codePtr, (ulong)code.Length); + } + } + } + else + { + ReprotectAsWritable(funcOffset, code.Length); + Marshal.Copy(code, 0, funcPtr, code.Length); + ReprotectAsExecutable(funcOffset, code.Length); + + _jitCacheInvalidator.Invalidate(funcPtr, (ulong)code.Length); + } + + Add(funcOffset, code.Length, func.UnwindInfo); + + return funcPtr; + } + } + + public static void Unmap(IntPtr pointer) + { + lock (_lock) + { + Debug.Assert(_initialized); + + int funcOffset = (int)(pointer.ToInt64() - _jitRegion.Pointer.ToInt64()); + + bool result = TryFind(funcOffset, out CacheEntry entry); + Debug.Assert(result); + + _cacheAllocator.Free(funcOffset, AlignCodeSize(entry.Size)); + + Remove(funcOffset); + } + } + + private static void ReprotectAsWritable(int offset, int size) + { + int endOffs = offset + size; + + int regionStart = offset & ~PageMask; + int regionEnd = (endOffs + PageMask) & ~PageMask; + + _jitRegion.Block.MapAsRwx((ulong)regionStart, (ulong)(regionEnd - regionStart)); + } + + private static void ReprotectAsExecutable(int offset, int size) + { + int endOffs = offset + size; + + int regionStart = offset & ~PageMask; + int regionEnd = (endOffs + PageMask) & ~PageMask; + + _jitRegion.Block.MapAsRx((ulong)regionStart, (ulong)(regionEnd - regionStart)); + } + + private static int Allocate(int codeSize) + { + codeSize = AlignCodeSize(codeSize); + + int allocOffset = _cacheAllocator.Allocate(codeSize); + + if (allocOffset < 0) + { + throw new OutOfMemoryException("JIT Cache exhausted."); + } + + _jitRegion.ExpandIfNeeded((ulong)allocOffset + (ulong)codeSize); + + return allocOffset; + } + + private static int AlignCodeSize(int codeSize) + { + return checked(codeSize + (CodeAlignment - 1)) & ~(CodeAlignment - 1); + } + + private static void Add(int offset, int size, UnwindInfo unwindInfo) + { + CacheEntry entry = new CacheEntry(offset, size, unwindInfo); + + int index = _cacheEntries.BinarySearch(entry); + + if (index < 0) + { + index = ~index; + } + + _cacheEntries.Insert(index, entry); + } + + private static void Remove(int offset) + { + int index = _cacheEntries.BinarySearch(new CacheEntry(offset, 0, default)); + + if (index < 0) + { + index = ~index - 1; + } + + if (index >= 0) + { + _cacheEntries.RemoveAt(index); + } + } + + public static bool TryFind(int offset, out CacheEntry entry) + { + lock (_lock) + { + int index = _cacheEntries.BinarySearch(new CacheEntry(offset, 0, default)); + + if (index < 0) + { + index = ~index - 1; + } + + if (index >= 0) + { + entry = _cacheEntries[index]; + return true; + } + } + + entry = default; + return false; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Translation/Cache/JitCacheInvalidation.cs b/src/ARMeilleure/Translation/Cache/JitCacheInvalidation.cs new file mode 100644 index 00000000..ec2ae73b --- /dev/null +++ b/src/ARMeilleure/Translation/Cache/JitCacheInvalidation.cs @@ -0,0 +1,79 @@ +using ARMeilleure.Memory; +using System; +using System.Runtime.InteropServices; + +namespace ARMeilleure.Translation.Cache +{ + class JitCacheInvalidation + { + private static int[] _invalidationCode = new int[] + { + unchecked((int)0xd53b0022), // mrs x2, ctr_el0 + unchecked((int)0xd3504c44), // ubfx x4, x2, #16, #4 + unchecked((int)0x52800083), // mov w3, #0x4 + unchecked((int)0x12000c45), // and w5, w2, #0xf + unchecked((int)0x1ac42064), // lsl w4, w3, w4 + unchecked((int)0x51000482), // sub w2, w4, #0x1 + unchecked((int)0x8a220002), // bic x2, x0, x2 + unchecked((int)0x1ac52063), // lsl w3, w3, w5 + unchecked((int)0xeb01005f), // cmp x2, x1 + unchecked((int)0x93407c84), // sxtw x4, w4 + unchecked((int)0x540000a2), // b.cs 3c <do_ic_clear> + unchecked((int)0xd50b7b22), // dc cvau, x2 + unchecked((int)0x8b040042), // add x2, x2, x4 + unchecked((int)0xeb02003f), // cmp x1, x2 + unchecked((int)0x54ffffa8), // b.hi 2c <dc_clear_loop> + unchecked((int)0xd5033b9f), // dsb ish + unchecked((int)0x51000462), // sub w2, w3, #0x1 + unchecked((int)0x93407c63), // sxtw x3, w3 + unchecked((int)0x8a220000), // bic x0, x0, x2 + unchecked((int)0xeb00003f), // cmp x1, x0 + unchecked((int)0x540000a9), // b.ls 64 <exit> + unchecked((int)0xd50b7520), // ic ivau, x0 + unchecked((int)0x8b030000), // add x0, x0, x3 + unchecked((int)0xeb00003f), // cmp x1, x0 + unchecked((int)0x54ffffa8), // b.hi 54 <ic_clear_loop> + unchecked((int)0xd5033b9f), // dsb ish + unchecked((int)0xd5033fdf), // isb + unchecked((int)0xd65f03c0), // ret + }; + + private delegate void InvalidateCache(ulong start, ulong end); + + private InvalidateCache _invalidateCache; + private ReservedRegion _invalidateCacheCodeRegion; + + private readonly bool _needsInvalidation; + + public JitCacheInvalidation(IJitMemoryAllocator allocator) + { + // On macOS, a different path is used to write to the JIT cache, which does the invalidation. + if (!OperatingSystem.IsMacOS() && RuntimeInformation.ProcessArchitecture == Architecture.Arm64) + { + ulong size = (ulong)_invalidationCode.Length * sizeof(int); + ulong mask = (ulong)ReservedRegion.DefaultGranularity - 1; + + size = (size + mask) & ~mask; + + _invalidateCacheCodeRegion = new ReservedRegion(allocator, size); + _invalidateCacheCodeRegion.ExpandIfNeeded(size); + + Marshal.Copy(_invalidationCode, 0, _invalidateCacheCodeRegion.Pointer, _invalidationCode.Length); + + _invalidateCacheCodeRegion.Block.MapAsRx(0, size); + + _invalidateCache = Marshal.GetDelegateForFunctionPointer<InvalidateCache>(_invalidateCacheCodeRegion.Pointer); + + _needsInvalidation = true; + } + } + + public void Invalidate(IntPtr basePointer, ulong size) + { + if (_needsInvalidation) + { + _invalidateCache((ulong)basePointer, (ulong)basePointer + size); + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Translation/Cache/JitUnwindWindows.cs b/src/ARMeilleure/Translation/Cache/JitUnwindWindows.cs new file mode 100644 index 00000000..77727bf1 --- /dev/null +++ b/src/ARMeilleure/Translation/Cache/JitUnwindWindows.cs @@ -0,0 +1,189 @@ +// https://github.com/MicrosoftDocs/cpp-docs/blob/master/docs/build/exception-handling-x64.md + +using ARMeilleure.CodeGen.Unwinding; +using System; +using System.Diagnostics; +using System.Runtime.InteropServices; + +namespace ARMeilleure.Translation.Cache +{ + static partial class JitUnwindWindows + { + private const int MaxUnwindCodesArraySize = 32; // Must be an even value. + + private struct RuntimeFunction + { + public uint BeginAddress; + public uint EndAddress; + public uint UnwindData; + } + + private struct UnwindInfo + { + public byte VersionAndFlags; + public byte SizeOfProlog; + public byte CountOfUnwindCodes; + public byte FrameRegister; + public unsafe fixed ushort UnwindCodes[MaxUnwindCodesArraySize]; + } + + private enum UnwindOp + { + PushNonvol = 0, + AllocLarge = 1, + AllocSmall = 2, + SetFpreg = 3, + SaveNonvol = 4, + SaveNonvolFar = 5, + SaveXmm128 = 8, + SaveXmm128Far = 9, + PushMachframe = 10 + } + + private unsafe delegate RuntimeFunction* GetRuntimeFunctionCallback(ulong controlPc, IntPtr context); + + [LibraryImport("kernel32.dll")] + [return: MarshalAs(UnmanagedType.Bool)] + private static unsafe partial bool RtlInstallFunctionTableCallback( + ulong tableIdentifier, + ulong baseAddress, + uint length, + GetRuntimeFunctionCallback callback, + IntPtr context, + [MarshalAs(UnmanagedType.LPWStr)] string outOfProcessCallbackDll); + + private static GetRuntimeFunctionCallback _getRuntimeFunctionCallback; + + private static int _sizeOfRuntimeFunction; + + private unsafe static RuntimeFunction* _runtimeFunction; + + private unsafe static UnwindInfo* _unwindInfo; + + public static void InstallFunctionTableHandler(IntPtr codeCachePointer, uint codeCacheLength, IntPtr workBufferPtr) + { + ulong codeCachePtr = (ulong)codeCachePointer.ToInt64(); + + _sizeOfRuntimeFunction = Marshal.SizeOf<RuntimeFunction>(); + + bool result; + + unsafe + { + _runtimeFunction = (RuntimeFunction*)workBufferPtr; + + _unwindInfo = (UnwindInfo*)(workBufferPtr + _sizeOfRuntimeFunction); + + _getRuntimeFunctionCallback = new GetRuntimeFunctionCallback(FunctionTableHandler); + + result = RtlInstallFunctionTableCallback( + codeCachePtr | 3, + codeCachePtr, + codeCacheLength, + _getRuntimeFunctionCallback, + codeCachePointer, + null); + } + + if (!result) + { + throw new InvalidOperationException("Failure installing function table callback."); + } + } + + private static unsafe RuntimeFunction* FunctionTableHandler(ulong controlPc, IntPtr context) + { + int offset = (int)((long)controlPc - context.ToInt64()); + + if (!JitCache.TryFind(offset, out CacheEntry funcEntry)) + { + return null; // Not found. + } + + var unwindInfo = funcEntry.UnwindInfo; + + int codeIndex = 0; + + for (int index = unwindInfo.PushEntries.Length - 1; index >= 0; index--) + { + var entry = unwindInfo.PushEntries[index]; + + switch (entry.PseudoOp) + { + case UnwindPseudoOp.SaveXmm128: + { + int stackOffset = entry.StackOffsetOrAllocSize; + + Debug.Assert(stackOffset % 16 == 0); + + if (stackOffset <= 0xFFFF0) + { + _unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.SaveXmm128, entry.PrologOffset, entry.RegIndex); + _unwindInfo->UnwindCodes[codeIndex++] = (ushort)(stackOffset / 16); + } + else + { + _unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.SaveXmm128Far, entry.PrologOffset, entry.RegIndex); + _unwindInfo->UnwindCodes[codeIndex++] = (ushort)(stackOffset >> 0); + _unwindInfo->UnwindCodes[codeIndex++] = (ushort)(stackOffset >> 16); + } + + break; + } + + case UnwindPseudoOp.AllocStack: + { + int allocSize = entry.StackOffsetOrAllocSize; + + Debug.Assert(allocSize % 8 == 0); + + if (allocSize <= 128) + { + _unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.AllocSmall, entry.PrologOffset, (allocSize / 8) - 1); + } + else if (allocSize <= 0x7FFF8) + { + _unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.AllocLarge, entry.PrologOffset, 0); + _unwindInfo->UnwindCodes[codeIndex++] = (ushort)(allocSize / 8); + } + else + { + _unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.AllocLarge, entry.PrologOffset, 1); + _unwindInfo->UnwindCodes[codeIndex++] = (ushort)(allocSize >> 0); + _unwindInfo->UnwindCodes[codeIndex++] = (ushort)(allocSize >> 16); + } + + break; + } + + case UnwindPseudoOp.PushReg: + { + _unwindInfo->UnwindCodes[codeIndex++] = PackUnwindOp(UnwindOp.PushNonvol, entry.PrologOffset, entry.RegIndex); + + break; + } + + default: throw new NotImplementedException($"({nameof(entry.PseudoOp)} = {entry.PseudoOp})"); + } + } + + Debug.Assert(codeIndex <= MaxUnwindCodesArraySize); + + _unwindInfo->VersionAndFlags = 1; // Flags: The function has no handler. + _unwindInfo->SizeOfProlog = (byte)unwindInfo.PrologSize; + _unwindInfo->CountOfUnwindCodes = (byte)codeIndex; + _unwindInfo->FrameRegister = 0; + + _runtimeFunction->BeginAddress = (uint)funcEntry.Offset; + _runtimeFunction->EndAddress = (uint)(funcEntry.Offset + funcEntry.Size); + _runtimeFunction->UnwindData = (uint)_sizeOfRuntimeFunction; + + return _runtimeFunction; + } + + private static ushort PackUnwindOp(UnwindOp op, int prologOffset, int opInfo) + { + return (ushort)(prologOffset | ((int)op << 8) | (opInfo << 12)); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Translation/Compiler.cs b/src/ARMeilleure/Translation/Compiler.cs new file mode 100644 index 00000000..d4aa5cd9 --- /dev/null +++ b/src/ARMeilleure/Translation/Compiler.cs @@ -0,0 +1,68 @@ +using ARMeilleure.CodeGen; +using ARMeilleure.CodeGen.Optimizations; +using ARMeilleure.Diagnostics; +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Runtime.InteropServices; + +namespace ARMeilleure.Translation +{ + static class Compiler + { + public static CompiledFunction Compile( + ControlFlowGraph cfg, + OperandType[] argTypes, + OperandType retType, + CompilerOptions options, + Architecture target) + { + CompilerContext cctx = new(cfg, argTypes, retType, options); + + if (options.HasFlag(CompilerOptions.Optimize)) + { + Logger.StartPass(PassName.TailMerge); + + TailMerge.RunPass(cctx); + + Logger.EndPass(PassName.TailMerge, cfg); + } + + if (options.HasFlag(CompilerOptions.SsaForm)) + { + Logger.StartPass(PassName.Dominance); + + Dominance.FindDominators(cfg); + Dominance.FindDominanceFrontiers(cfg); + + Logger.EndPass(PassName.Dominance); + + Logger.StartPass(PassName.SsaConstruction); + + Ssa.Construct(cfg); + + Logger.EndPass(PassName.SsaConstruction, cfg); + } + else + { + Logger.StartPass(PassName.RegisterToLocal); + + RegisterToLocal.Rename(cfg); + + Logger.EndPass(PassName.RegisterToLocal, cfg); + } + + if (target == Architecture.X64) + { + return CodeGen.X86.CodeGenerator.Generate(cctx); + } + else if (target == Architecture.Arm64) + { + return CodeGen.Arm64.CodeGenerator.Generate(cctx); + } + else + { + throw new NotImplementedException(target.ToString()); + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Translation/CompilerContext.cs b/src/ARMeilleure/Translation/CompilerContext.cs new file mode 100644 index 00000000..510dec58 --- /dev/null +++ b/src/ARMeilleure/Translation/CompilerContext.cs @@ -0,0 +1,26 @@ +using ARMeilleure.IntermediateRepresentation; + +namespace ARMeilleure.Translation +{ + readonly struct CompilerContext + { + public ControlFlowGraph Cfg { get; } + + public OperandType[] FuncArgTypes { get; } + public OperandType FuncReturnType { get; } + + public CompilerOptions Options { get; } + + public CompilerContext( + ControlFlowGraph cfg, + OperandType[] funcArgTypes, + OperandType funcReturnType, + CompilerOptions options) + { + Cfg = cfg; + FuncArgTypes = funcArgTypes; + FuncReturnType = funcReturnType; + Options = options; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Translation/CompilerOptions.cs b/src/ARMeilleure/Translation/CompilerOptions.cs new file mode 100644 index 00000000..0a07ed4a --- /dev/null +++ b/src/ARMeilleure/Translation/CompilerOptions.cs @@ -0,0 +1,17 @@ +using System; + +namespace ARMeilleure.Translation +{ + [Flags] + enum CompilerOptions + { + None = 0, + SsaForm = 1 << 0, + Optimize = 1 << 1, + Lsra = 1 << 2, + Relocatable = 1 << 3, + + MediumCq = SsaForm | Optimize, + HighCq = SsaForm | Optimize | Lsra + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Translation/ControlFlowGraph.cs b/src/ARMeilleure/Translation/ControlFlowGraph.cs new file mode 100644 index 00000000..c935f152 --- /dev/null +++ b/src/ARMeilleure/Translation/ControlFlowGraph.cs @@ -0,0 +1,155 @@ +using ARMeilleure.IntermediateRepresentation; +using System; +using System.Collections.Generic; +using System.Diagnostics; + +namespace ARMeilleure.Translation +{ + class ControlFlowGraph + { + private BasicBlock[] _postOrderBlocks; + private int[] _postOrderMap; + + public int LocalsCount { get; private set; } + public BasicBlock Entry { get; } + public IntrusiveList<BasicBlock> Blocks { get; } + public BasicBlock[] PostOrderBlocks => _postOrderBlocks; + public int[] PostOrderMap => _postOrderMap; + + public ControlFlowGraph(BasicBlock entry, IntrusiveList<BasicBlock> blocks, int localsCount) + { + Entry = entry; + Blocks = blocks; + LocalsCount = localsCount; + + Update(); + } + + public Operand AllocateLocal(OperandType type) + { + Operand result = Operand.Factory.Local(type); + + result.NumberLocal(++LocalsCount); + + return result; + } + + public void Update() + { + RemoveUnreachableBlocks(Blocks); + + var visited = new HashSet<BasicBlock>(); + var blockStack = new Stack<BasicBlock>(); + + Array.Resize(ref _postOrderBlocks, Blocks.Count); + Array.Resize(ref _postOrderMap, Blocks.Count); + + visited.Add(Entry); + blockStack.Push(Entry); + + int index = 0; + + while (blockStack.TryPop(out BasicBlock block)) + { + bool visitedNew = false; + + for (int i = 0; i < block.SuccessorsCount; i++) + { + BasicBlock succ = block.GetSuccessor(i); + + if (visited.Add(succ)) + { + blockStack.Push(block); + blockStack.Push(succ); + + visitedNew = true; + + break; + } + } + + if (!visitedNew) + { + PostOrderMap[block.Index] = index; + + PostOrderBlocks[index++] = block; + } + } + } + + private void RemoveUnreachableBlocks(IntrusiveList<BasicBlock> blocks) + { + var visited = new HashSet<BasicBlock>(); + var workQueue = new Queue<BasicBlock>(); + + visited.Add(Entry); + workQueue.Enqueue(Entry); + + while (workQueue.TryDequeue(out BasicBlock block)) + { + Debug.Assert(block.Index != -1, "Invalid block index."); + + for (int i = 0; i < block.SuccessorsCount; i++) + { + BasicBlock succ = block.GetSuccessor(i); + + if (visited.Add(succ)) + { + workQueue.Enqueue(succ); + } + } + } + + if (visited.Count < blocks.Count) + { + // Remove unreachable blocks and renumber. + int index = 0; + + for (BasicBlock block = blocks.First; block != null;) + { + BasicBlock nextBlock = block.ListNext; + + if (!visited.Contains(block)) + { + while (block.SuccessorsCount > 0) + { + block.RemoveSuccessor(index: block.SuccessorsCount - 1); + } + + blocks.Remove(block); + } + else + { + block.Index = index++; + } + + block = nextBlock; + } + } + } + + public BasicBlock SplitEdge(BasicBlock predecessor, BasicBlock successor) + { + BasicBlock splitBlock = new BasicBlock(Blocks.Count); + + for (int i = 0; i < predecessor.SuccessorsCount; i++) + { + if (predecessor.GetSuccessor(i) == successor) + { + predecessor.SetSuccessor(i, splitBlock); + } + } + + if (splitBlock.Predecessors.Count == 0) + { + throw new ArgumentException("Predecessor and successor are not connected."); + } + + splitBlock.AddSuccessor(successor); + + Blocks.AddBefore(successor, splitBlock); + + return splitBlock; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Translation/DelegateHelper.cs b/src/ARMeilleure/Translation/DelegateHelper.cs new file mode 100644 index 00000000..43a39bab --- /dev/null +++ b/src/ARMeilleure/Translation/DelegateHelper.cs @@ -0,0 +1,104 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Reflection; +using System.Reflection.Emit; + +namespace ARMeilleure.Translation +{ + static class DelegateHelper + { + private const string DelegateTypesAssemblyName = "JitDelegateTypes"; + + private static readonly ModuleBuilder _modBuilder; + + private static readonly Dictionary<string, Type> _delegateTypesCache; + + static DelegateHelper() + { + AssemblyBuilder asmBuilder = AssemblyBuilder.DefineDynamicAssembly(new AssemblyName(DelegateTypesAssemblyName), AssemblyBuilderAccess.Run); + + _modBuilder = asmBuilder.DefineDynamicModule(DelegateTypesAssemblyName); + + _delegateTypesCache = new Dictionary<string, Type>(); + } + + public static Delegate GetDelegate(MethodInfo info) + { + ArgumentNullException.ThrowIfNull(info); + + Type[] parameters = info.GetParameters().Select(pI => pI.ParameterType).ToArray(); + Type returnType = info.ReturnType; + + Type delegateType = GetDelegateType(parameters, returnType); + + return Delegate.CreateDelegate(delegateType, info); + } + + private static Type GetDelegateType(Type[] parameters, Type returnType) + { + string key = GetFunctionSignatureKey(parameters, returnType); + + if (!_delegateTypesCache.TryGetValue(key, out Type delegateType)) + { + delegateType = MakeDelegateType(parameters, returnType, key); + + _delegateTypesCache.TryAdd(key, delegateType); + } + + return delegateType; + } + + private static string GetFunctionSignatureKey(Type[] parameters, Type returnType) + { + string sig = GetTypeName(returnType); + + foreach (Type type in parameters) + { + sig += '_' + GetTypeName(type); + } + + return sig; + } + + private static string GetTypeName(Type type) + { + return type.FullName.Replace(".", string.Empty); + } + + private const MethodAttributes CtorAttributes = + MethodAttributes.RTSpecialName | + MethodAttributes.HideBySig | + MethodAttributes.Public; + + private const TypeAttributes DelegateTypeAttributes = + TypeAttributes.Class | + TypeAttributes.Public | + TypeAttributes.Sealed | + TypeAttributes.AnsiClass | + TypeAttributes.AutoClass; + + private const MethodImplAttributes ImplAttributes = + MethodImplAttributes.Runtime | + MethodImplAttributes.Managed; + + private const MethodAttributes InvokeAttributes = + MethodAttributes.Public | + MethodAttributes.HideBySig | + MethodAttributes.NewSlot | + MethodAttributes.Virtual; + + private static readonly Type[] _delegateCtorSignature = { typeof(object), typeof(IntPtr) }; + + private static Type MakeDelegateType(Type[] parameters, Type returnType, string name) + { + TypeBuilder builder = _modBuilder.DefineType(name, DelegateTypeAttributes, typeof(MulticastDelegate)); + + builder.DefineConstructor(CtorAttributes, CallingConventions.Standard, _delegateCtorSignature).SetImplementationFlags(ImplAttributes); + + builder.DefineMethod("Invoke", InvokeAttributes, returnType, parameters).SetImplementationFlags(ImplAttributes); + + return builder.CreateTypeInfo(); + } + } +} diff --git a/src/ARMeilleure/Translation/DelegateInfo.cs b/src/ARMeilleure/Translation/DelegateInfo.cs new file mode 100644 index 00000000..36320ac3 --- /dev/null +++ b/src/ARMeilleure/Translation/DelegateInfo.cs @@ -0,0 +1,19 @@ +using System; +using System.Runtime.InteropServices; + +namespace ARMeilleure.Translation +{ + class DelegateInfo + { + private readonly Delegate _dlg; // Ensure that this delegate will not be garbage collected. + + public IntPtr FuncPtr { get; } + + public DelegateInfo(Delegate dlg) + { + _dlg = dlg; + + FuncPtr = Marshal.GetFunctionPointerForDelegate<Delegate>(dlg); + } + } +} diff --git a/src/ARMeilleure/Translation/Delegates.cs b/src/ARMeilleure/Translation/Delegates.cs new file mode 100644 index 00000000..55f1e514 --- /dev/null +++ b/src/ARMeilleure/Translation/Delegates.cs @@ -0,0 +1,261 @@ +using ARMeilleure.Instructions; +using System; +using System.Collections.Generic; +using System.Reflection; + +namespace ARMeilleure.Translation +{ + static class Delegates + { + public static bool TryGetDelegateFuncPtrByIndex(int index, out IntPtr funcPtr) + { + if (index >= 0 && index < _delegates.Count) + { + funcPtr = _delegates.Values[index].FuncPtr; // O(1). + + return true; + } + else + { + funcPtr = default; + + return false; + } + } + + public static IntPtr GetDelegateFuncPtrByIndex(int index) + { + if (index < 0 || index >= _delegates.Count) + { + throw new ArgumentOutOfRangeException($"({nameof(index)} = {index})"); + } + + return _delegates.Values[index].FuncPtr; // O(1). + } + + public static IntPtr GetDelegateFuncPtr(MethodInfo info) + { + ArgumentNullException.ThrowIfNull(info); + + string key = GetKey(info); + + if (!_delegates.TryGetValue(key, out DelegateInfo dlgInfo)) // O(log(n)). + { + throw new KeyNotFoundException($"({nameof(key)} = {key})"); + } + + return dlgInfo.FuncPtr; + } + + public static int GetDelegateIndex(MethodInfo info) + { + ArgumentNullException.ThrowIfNull(info); + + string key = GetKey(info); + + int index = _delegates.IndexOfKey(key); // O(log(n)). + + if (index == -1) + { + throw new KeyNotFoundException($"({nameof(key)} = {key})"); + } + + return index; + } + + private static void SetDelegateInfo(MethodInfo info) + { + string key = GetKey(info); + + Delegate dlg = DelegateHelper.GetDelegate(info); + + _delegates.Add(key, new DelegateInfo(dlg)); // ArgumentException (key). + } + + private static string GetKey(MethodInfo info) + { + return $"{info.DeclaringType.Name}.{info.Name}"; + } + + private static readonly SortedList<string, DelegateInfo> _delegates; + + static Delegates() + { + _delegates = new SortedList<string, DelegateInfo>(); + + SetDelegateInfo(typeof(Math).GetMethod(nameof(Math.Abs), new Type[] { typeof(double) })); + SetDelegateInfo(typeof(Math).GetMethod(nameof(Math.Ceiling), new Type[] { typeof(double) })); + SetDelegateInfo(typeof(Math).GetMethod(nameof(Math.Floor), new Type[] { typeof(double) })); + SetDelegateInfo(typeof(Math).GetMethod(nameof(Math.Round), new Type[] { typeof(double), typeof(MidpointRounding) })); + SetDelegateInfo(typeof(Math).GetMethod(nameof(Math.Truncate), new Type[] { typeof(double) })); + + SetDelegateInfo(typeof(MathF).GetMethod(nameof(MathF.Abs), new Type[] { typeof(float) })); + SetDelegateInfo(typeof(MathF).GetMethod(nameof(MathF.Ceiling), new Type[] { typeof(float) })); + SetDelegateInfo(typeof(MathF).GetMethod(nameof(MathF.Floor), new Type[] { typeof(float) })); + SetDelegateInfo(typeof(MathF).GetMethod(nameof(MathF.Round), new Type[] { typeof(float), typeof(MidpointRounding) })); + SetDelegateInfo(typeof(MathF).GetMethod(nameof(MathF.Truncate), new Type[] { typeof(float) })); + + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.Break))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.CheckSynchronization))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.EnqueueForRejit))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntfrqEl0))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntpctEl0))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCntvctEl0))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetCtrEl0))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetDczidEl0))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFunctionAddress))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.InvalidateCacheLine))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadByte))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt16))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt32))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadUInt64))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ReadVector128))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.SignalMemoryTracking))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.SupervisorCall))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.ThrowInvalidMemoryAccess))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.Undefined))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteByte))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt16))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt32))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteUInt64))); + SetDelegateInfo(typeof(NativeInterface).GetMethod(nameof(NativeInterface.WriteVector128))); + + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingSigns))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.CountLeadingZeros))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Crc32b))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Crc32cb))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Crc32ch))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Crc32cw))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Crc32cx))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Crc32h))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Crc32w))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Crc32x))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Decrypt))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Encrypt))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.FixedRotate))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashChoose))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashLower))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashMajority))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashParity))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.HashUpper))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.InverseMixColumns))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.MixColumns))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.PolynomialMult64_128))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS64))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU64))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS32))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU32))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha1SchedulePart1))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha1SchedulePart2))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart1))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Sha256SchedulePart2))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.SignedShrImm64))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl1))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl2))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl3))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbl4))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx1))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx2))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx3))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.Tbx4))); + SetDelegateInfo(typeof(SoftFallback).GetMethod(nameof(SoftFallback.UnsignedShrImm64))); + + SetDelegateInfo(typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert))); + SetDelegateInfo(typeof(SoftFloat16_64).GetMethod(nameof(SoftFloat16_64.FPConvert))); + + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPAdd))); + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPAddFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPCompare))); + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPCompareEQ))); + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPCompareEQFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPCompareGE))); + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPCompareGEFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPCompareGT))); + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPCompareGTFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPCompareLE))); + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPCompareLEFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPCompareLT))); + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPCompareLTFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPDiv))); + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMax))); + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMaxFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMaxNum))); + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMaxNumFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMin))); + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMinFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMinNum))); + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMinNumFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMul))); + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMulFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMulAdd))); + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMulAddFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMulSub))); + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMulSubFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPMulX))); + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPNegMulAdd))); + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPNegMulSub))); + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPRecipEstimate))); + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPRecipEstimateFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPRecipStep))); // A32 only. + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPRecipStepFused))); + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPRecpX))); + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPRSqrtEstimate))); + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPRSqrtEstimateFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPRSqrtStep))); // A32 only. + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPRSqrtStepFused))); + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPSqrt))); + SetDelegateInfo(typeof(SoftFloat32).GetMethod(nameof(SoftFloat32.FPSub))); + + SetDelegateInfo(typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert))); + + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPAdd))); + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPAddFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPCompare))); + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPCompareEQ))); + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPCompareEQFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPCompareGE))); + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPCompareGEFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPCompareGT))); + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPCompareGTFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPCompareLE))); + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPCompareLEFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPCompareLT))); + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPCompareLTFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPDiv))); + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMax))); + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMaxFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMaxNum))); + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMaxNumFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMin))); + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMinFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMinNum))); + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMinNumFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMul))); + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMulFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMulAdd))); + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMulAddFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMulSub))); + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMulSubFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPMulX))); + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPNegMulAdd))); + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPNegMulSub))); + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPRecipEstimate))); + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPRecipEstimateFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPRecipStep))); // A32 only. + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPRecipStepFused))); + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPRecpX))); + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPRSqrtEstimate))); + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPRSqrtEstimateFpscr))); // A32 only. + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPRSqrtStep))); // A32 only. + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPRSqrtStepFused))); + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPSqrt))); + SetDelegateInfo(typeof(SoftFloat64).GetMethod(nameof(SoftFloat64.FPSub))); + + SetDelegateInfo(typeof(SoftFloat64_16).GetMethod(nameof(SoftFloat64_16.FPConvert))); + } + } +} diff --git a/src/ARMeilleure/Translation/DispatcherFunction.cs b/src/ARMeilleure/Translation/DispatcherFunction.cs new file mode 100644 index 00000000..7d5a3388 --- /dev/null +++ b/src/ARMeilleure/Translation/DispatcherFunction.cs @@ -0,0 +1,7 @@ +using System; + +namespace ARMeilleure.Translation +{ + delegate void DispatcherFunction(IntPtr nativeContext, ulong startAddress); + delegate ulong WrapperFunction(IntPtr nativeContext, ulong startAddress); +} diff --git a/src/ARMeilleure/Translation/Dominance.cs b/src/ARMeilleure/Translation/Dominance.cs new file mode 100644 index 00000000..b9b961d1 --- /dev/null +++ b/src/ARMeilleure/Translation/Dominance.cs @@ -0,0 +1,95 @@ +using ARMeilleure.IntermediateRepresentation; +using System.Diagnostics; + +namespace ARMeilleure.Translation +{ + static class Dominance + { + // Those methods are an implementation of the algorithms on "A Simple, Fast Dominance Algorithm". + // https://www.cs.rice.edu/~keith/EMBED/dom.pdf + public static void FindDominators(ControlFlowGraph cfg) + { + BasicBlock Intersect(BasicBlock block1, BasicBlock block2) + { + while (block1 != block2) + { + while (cfg.PostOrderMap[block1.Index] < cfg.PostOrderMap[block2.Index]) + { + block1 = block1.ImmediateDominator; + } + + while (cfg.PostOrderMap[block2.Index] < cfg.PostOrderMap[block1.Index]) + { + block2 = block2.ImmediateDominator; + } + } + + return block1; + } + + cfg.Entry.ImmediateDominator = cfg.Entry; + + Debug.Assert(cfg.Entry == cfg.PostOrderBlocks[cfg.PostOrderBlocks.Length - 1]); + + bool modified; + + do + { + modified = false; + + for (int blkIndex = cfg.PostOrderBlocks.Length - 2; blkIndex >= 0; blkIndex--) + { + BasicBlock block = cfg.PostOrderBlocks[blkIndex]; + + BasicBlock newIDom = null; + + foreach (BasicBlock predecessor in block.Predecessors) + { + if (predecessor.ImmediateDominator != null) + { + if (newIDom != null) + { + newIDom = Intersect(predecessor, newIDom); + } + else + { + newIDom = predecessor; + } + } + } + + if (block.ImmediateDominator != newIDom) + { + block.ImmediateDominator = newIDom; + + modified = true; + } + } + } + while (modified); + } + + public static void FindDominanceFrontiers(ControlFlowGraph cfg) + { + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + if (block.Predecessors.Count < 2) + { + continue; + } + + for (int pBlkIndex = 0; pBlkIndex < block.Predecessors.Count; pBlkIndex++) + { + BasicBlock current = block.Predecessors[pBlkIndex]; + + while (current != block.ImmediateDominator) + { + current.DominanceFrontiers.Add(block); + + current = current.ImmediateDominator; + } + } + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Translation/EmitterContext.cs b/src/ARMeilleure/Translation/EmitterContext.cs new file mode 100644 index 00000000..8fcb4dee --- /dev/null +++ b/src/ARMeilleure/Translation/EmitterContext.cs @@ -0,0 +1,680 @@ +using ARMeilleure.Diagnostics; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using System; +using System.Collections.Generic; +using System.Reflection; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Translation +{ + class EmitterContext + { + private int _localsCount; + + private readonly Dictionary<Operand, BasicBlock> _irLabels; + private readonly IntrusiveList<BasicBlock> _irBlocks; + + private BasicBlock _irBlock; + private BasicBlock _ifBlock; + + private bool _needsNewBlock; + private BasicBlockFrequency _nextBlockFreq; + + public EmitterContext() + { + _localsCount = 0; + + _irLabels = new Dictionary<Operand, BasicBlock>(); + _irBlocks = new IntrusiveList<BasicBlock>(); + + _needsNewBlock = true; + _nextBlockFreq = BasicBlockFrequency.Default; + } + + public Operand AllocateLocal(OperandType type) + { + Operand local = Local(type); + + local.NumberLocal(++_localsCount); + + return local; + } + + public Operand Add(Operand op1, Operand op2) + { + return Add(Instruction.Add, Local(op1.Type), op1, op2); + } + + public Operand BitwiseAnd(Operand op1, Operand op2) + { + return Add(Instruction.BitwiseAnd, Local(op1.Type), op1, op2); + } + + public Operand BitwiseExclusiveOr(Operand op1, Operand op2) + { + return Add(Instruction.BitwiseExclusiveOr, Local(op1.Type), op1, op2); + } + + public Operand BitwiseNot(Operand op1) + { + return Add(Instruction.BitwiseNot, Local(op1.Type), op1); + } + + public Operand BitwiseOr(Operand op1, Operand op2) + { + return Add(Instruction.BitwiseOr, Local(op1.Type), op1, op2); + } + + public void Branch(Operand label) + { + NewNextBlockIfNeeded(); + + BranchToLabel(label, uncond: true, BasicBlockFrequency.Default); + } + + public void BranchIf(Operand label, Operand op1, Operand op2, Comparison comp, BasicBlockFrequency falseFreq = default) + { + Add(Instruction.BranchIf, default, op1, op2, Const((int)comp)); + + BranchToLabel(label, uncond: false, falseFreq); + } + + public void BranchIfFalse(Operand label, Operand op1, BasicBlockFrequency falseFreq = default) + { + BranchIf(label, op1, Const(op1.Type, 0), Comparison.Equal, falseFreq); + } + + public void BranchIfTrue(Operand label, Operand op1, BasicBlockFrequency falseFreq = default) + { + BranchIf(label, op1, Const(op1.Type, 0), Comparison.NotEqual, falseFreq); + } + + public Operand ByteSwap(Operand op1) + { + return Add(Instruction.ByteSwap, Local(op1.Type), op1); + } + + public virtual Operand Call(MethodInfo info, params Operand[] callArgs) + { + IntPtr funcPtr = Delegates.GetDelegateFuncPtr(info); + + OperandType returnType = GetOperandType(info.ReturnType); + + Symbols.Add((ulong)funcPtr.ToInt64(), info.Name); + + return Call(Const(funcPtr.ToInt64()), returnType, callArgs); + } + + protected static OperandType GetOperandType(Type type) + { + if (type == typeof(bool) || type == typeof(byte) || + type == typeof(char) || type == typeof(short) || + type == typeof(int) || type == typeof(sbyte) || + type == typeof(ushort) || type == typeof(uint)) + { + return OperandType.I32; + } + else if (type == typeof(long) || type == typeof(ulong)) + { + return OperandType.I64; + } + else if (type == typeof(double)) + { + return OperandType.FP64; + } + else if (type == typeof(float)) + { + return OperandType.FP32; + } + else if (type == typeof(V128)) + { + return OperandType.V128; + } + else if (type == typeof(void)) + { + return OperandType.None; + } + else + { + throw new ArgumentException($"Invalid type \"{type.Name}\"."); + } + } + + public Operand Call(Operand address, OperandType returnType, params Operand[] callArgs) + { + Operand[] args = new Operand[callArgs.Length + 1]; + + args[0] = address; + + Array.Copy(callArgs, 0, args, 1, callArgs.Length); + + if (returnType != OperandType.None) + { + return Add(Instruction.Call, Local(returnType), args); + } + else + { + return Add(Instruction.Call, default, args); + } + } + + public void Tailcall(Operand address, params Operand[] callArgs) + { + Operand[] args = new Operand[callArgs.Length + 1]; + + args[0] = address; + + Array.Copy(callArgs, 0, args, 1, callArgs.Length); + + Add(Instruction.Tailcall, default, args); + + _needsNewBlock = true; + } + + public Operand CompareAndSwap(Operand address, Operand expected, Operand desired) + { + return Add(Instruction.CompareAndSwap, Local(desired.Type), address, expected, desired); + } + + public Operand CompareAndSwap16(Operand address, Operand expected, Operand desired) + { + return Add(Instruction.CompareAndSwap16, Local(OperandType.I32), address, expected, desired); + } + + public Operand CompareAndSwap8(Operand address, Operand expected, Operand desired) + { + return Add(Instruction.CompareAndSwap8, Local(OperandType.I32), address, expected, desired); + } + + public Operand ConditionalSelect(Operand op1, Operand op2, Operand op3) + { + return Add(Instruction.ConditionalSelect, Local(op2.Type), op1, op2, op3); + } + + public Operand ConvertI64ToI32(Operand op1) + { + if (op1.Type != OperandType.I64) + { + throw new ArgumentException($"Invalid operand type \"{op1.Type}\"."); + } + + return Add(Instruction.ConvertI64ToI32, Local(OperandType.I32), op1); + } + + public Operand ConvertToFP(OperandType type, Operand op1) + { + return Add(Instruction.ConvertToFP, Local(type), op1); + } + + public Operand ConvertToFPUI(OperandType type, Operand op1) + { + return Add(Instruction.ConvertToFPUI, Local(type), op1); + } + + public Operand Copy(Operand op1) + { + return Add(Instruction.Copy, Local(op1.Type), op1); + } + + public Operand Copy(Operand dest, Operand op1) + { + if (dest.Kind != OperandKind.Register && + (dest.Kind != OperandKind.LocalVariable || dest.GetLocalNumber() == 0)) + { + throw new ArgumentException($"Destination operand must be a Register or a numbered LocalVariable."); + } + + return Add(Instruction.Copy, dest, op1); + } + + public Operand CountLeadingZeros(Operand op1) + { + return Add(Instruction.CountLeadingZeros, Local(op1.Type), op1); + } + + public Operand Divide(Operand op1, Operand op2) + { + return Add(Instruction.Divide, Local(op1.Type), op1, op2); + } + + public Operand DivideUI(Operand op1, Operand op2) + { + return Add(Instruction.DivideUI, Local(op1.Type), op1, op2); + } + + public Operand ICompare(Operand op1, Operand op2, Comparison comp) + { + return Add(Instruction.Compare, Local(OperandType.I32), op1, op2, Const((int)comp)); + } + + public Operand ICompareEqual(Operand op1, Operand op2) + { + return ICompare(op1, op2, Comparison.Equal); + } + + public Operand ICompareGreater(Operand op1, Operand op2) + { + return ICompare(op1, op2, Comparison.Greater); + } + + public Operand ICompareGreaterOrEqual(Operand op1, Operand op2) + { + return ICompare(op1, op2, Comparison.GreaterOrEqual); + } + + public Operand ICompareGreaterOrEqualUI(Operand op1, Operand op2) + { + return ICompare(op1, op2, Comparison.GreaterOrEqualUI); + } + + public Operand ICompareGreaterUI(Operand op1, Operand op2) + { + return ICompare(op1, op2, Comparison.GreaterUI); + } + + public Operand ICompareLess(Operand op1, Operand op2) + { + return ICompare(op1, op2, Comparison.Less); + } + + public Operand ICompareLessOrEqual(Operand op1, Operand op2) + { + return ICompare(op1, op2, Comparison.LessOrEqual); + } + + public Operand ICompareLessOrEqualUI(Operand op1, Operand op2) + { + return ICompare(op1, op2, Comparison.LessOrEqualUI); + } + + public Operand ICompareLessUI(Operand op1, Operand op2) + { + return ICompare(op1, op2, Comparison.LessUI); + } + + public Operand ICompareNotEqual(Operand op1, Operand op2) + { + return ICompare(op1, op2, Comparison.NotEqual); + } + + public Operand Load(OperandType type, Operand address) + { + return Add(Instruction.Load, Local(type), address); + } + + public Operand Load16(Operand address) + { + return Add(Instruction.Load16, Local(OperandType.I32), address); + } + + public Operand Load8(Operand address) + { + return Add(Instruction.Load8, Local(OperandType.I32), address); + } + + public Operand LoadArgument(OperandType type, int index) + { + return Add(Instruction.LoadArgument, Local(type), Const(index)); + } + + public void LoadFromContext() + { + _needsNewBlock = true; + + Add(Instruction.LoadFromContext); + } + + public void MemoryBarrier() + { + Add(Instruction.MemoryBarrier); + } + + public Operand Multiply(Operand op1, Operand op2) + { + return Add(Instruction.Multiply, Local(op1.Type), op1, op2); + } + + public Operand Multiply64HighSI(Operand op1, Operand op2) + { + return Add(Instruction.Multiply64HighSI, Local(OperandType.I64), op1, op2); + } + + public Operand Multiply64HighUI(Operand op1, Operand op2) + { + return Add(Instruction.Multiply64HighUI, Local(OperandType.I64), op1, op2); + } + + public Operand Negate(Operand op1) + { + return Add(Instruction.Negate, Local(op1.Type), op1); + } + + public void Return() + { + Add(Instruction.Return); + + _needsNewBlock = true; + } + + public void Return(Operand op1) + { + Add(Instruction.Return, default, op1); + + _needsNewBlock = true; + } + + public Operand RotateRight(Operand op1, Operand op2) + { + return Add(Instruction.RotateRight, Local(op1.Type), op1, op2); + } + + public Operand ShiftLeft(Operand op1, Operand op2) + { + return Add(Instruction.ShiftLeft, Local(op1.Type), op1, op2); + } + + public Operand ShiftRightSI(Operand op1, Operand op2) + { + return Add(Instruction.ShiftRightSI, Local(op1.Type), op1, op2); + } + + public Operand ShiftRightUI(Operand op1, Operand op2) + { + return Add(Instruction.ShiftRightUI, Local(op1.Type), op1, op2); + } + + public Operand SignExtend16(OperandType type, Operand op1) + { + return Add(Instruction.SignExtend16, Local(type), op1); + } + + public Operand SignExtend32(OperandType type, Operand op1) + { + return Add(Instruction.SignExtend32, Local(type), op1); + } + + public Operand SignExtend8(OperandType type, Operand op1) + { + return Add(Instruction.SignExtend8, Local(type), op1); + } + + public void Store(Operand address, Operand value) + { + Add(Instruction.Store, default, address, value); + } + + public void Store16(Operand address, Operand value) + { + Add(Instruction.Store16, default, address, value); + } + + public void Store8(Operand address, Operand value) + { + Add(Instruction.Store8, default, address, value); + } + + public void StoreToContext() + { + Add(Instruction.StoreToContext); + + _needsNewBlock = true; + } + + public Operand Subtract(Operand op1, Operand op2) + { + return Add(Instruction.Subtract, Local(op1.Type), op1, op2); + } + + public Operand VectorCreateScalar(Operand value) + { + return Add(Instruction.VectorCreateScalar, Local(OperandType.V128), value); + } + + public Operand VectorExtract(OperandType type, Operand vector, int index) + { + return Add(Instruction.VectorExtract, Local(type), vector, Const(index)); + } + + public Operand VectorExtract16(Operand vector, int index) + { + return Add(Instruction.VectorExtract16, Local(OperandType.I32), vector, Const(index)); + } + + public Operand VectorExtract8(Operand vector, int index) + { + return Add(Instruction.VectorExtract8, Local(OperandType.I32), vector, Const(index)); + } + + public Operand VectorInsert(Operand vector, Operand value, int index) + { + return Add(Instruction.VectorInsert, Local(OperandType.V128), vector, value, Const(index)); + } + + public Operand VectorInsert16(Operand vector, Operand value, int index) + { + return Add(Instruction.VectorInsert16, Local(OperandType.V128), vector, value, Const(index)); + } + + public Operand VectorInsert8(Operand vector, Operand value, int index) + { + return Add(Instruction.VectorInsert8, Local(OperandType.V128), vector, value, Const(index)); + } + + public Operand VectorOne() + { + return Add(Instruction.VectorOne, Local(OperandType.V128)); + } + + public Operand VectorZero() + { + return Add(Instruction.VectorZero, Local(OperandType.V128)); + } + + public Operand VectorZeroUpper64(Operand vector) + { + return Add(Instruction.VectorZeroUpper64, Local(OperandType.V128), vector); + } + + public Operand VectorZeroUpper96(Operand vector) + { + return Add(Instruction.VectorZeroUpper96, Local(OperandType.V128), vector); + } + + public Operand ZeroExtend16(OperandType type, Operand op1) + { + return Add(Instruction.ZeroExtend16, Local(type), op1); + } + + public Operand ZeroExtend32(OperandType type, Operand op1) + { + return Add(Instruction.ZeroExtend32, Local(type), op1); + } + + public Operand ZeroExtend8(OperandType type, Operand op1) + { + return Add(Instruction.ZeroExtend8, Local(type), op1); + } + + private void NewNextBlockIfNeeded() + { + if (_needsNewBlock) + { + NewNextBlock(); + } + } + + private Operand Add(Instruction inst, Operand dest = default) + { + NewNextBlockIfNeeded(); + + Operation operation = Operation.Factory.Operation(inst, dest); + + _irBlock.Operations.AddLast(operation); + + return dest; + } + + private Operand Add(Instruction inst, Operand dest, Operand[] sources) + { + NewNextBlockIfNeeded(); + + Operation operation = Operation.Factory.Operation(inst, dest, sources); + + _irBlock.Operations.AddLast(operation); + + return dest; + } + + private Operand Add(Instruction inst, Operand dest, Operand source0) + { + NewNextBlockIfNeeded(); + + Operation operation = Operation.Factory.Operation(inst, dest, source0); + + _irBlock.Operations.AddLast(operation); + + return dest; + } + + private Operand Add(Instruction inst, Operand dest, Operand source0, Operand source1) + { + NewNextBlockIfNeeded(); + + Operation operation = Operation.Factory.Operation(inst, dest, source0, source1); + + _irBlock.Operations.AddLast(operation); + + return dest; + } + + private Operand Add(Instruction inst, Operand dest, Operand source0, Operand source1, Operand source2) + { + NewNextBlockIfNeeded(); + + Operation operation = Operation.Factory.Operation(inst, dest, source0, source1, source2); + + _irBlock.Operations.AddLast(operation); + + return dest; + } + + public Operand AddIntrinsic(Intrinsic intrin, params Operand[] args) + { + return Add(intrin, Local(OperandType.V128), args); + } + + public Operand AddIntrinsicInt(Intrinsic intrin, params Operand[] args) + { + return Add(intrin, Local(OperandType.I32), args); + } + + public Operand AddIntrinsicLong(Intrinsic intrin, params Operand[] args) + { + return Add(intrin, Local(OperandType.I64), args); + } + + public void AddIntrinsicNoRet(Intrinsic intrin, params Operand[] args) + { + Add(intrin, default, args); + } + + private Operand Add(Intrinsic intrin, Operand dest, params Operand[] sources) + { + NewNextBlockIfNeeded(); + + Operation operation = Operation.Factory.Operation(intrin, dest, sources); + + _irBlock.Operations.AddLast(operation); + + return dest; + } + + private void BranchToLabel(Operand label, bool uncond, BasicBlockFrequency nextFreq) + { + if (!_irLabels.TryGetValue(label, out BasicBlock branchBlock)) + { + branchBlock = new BasicBlock(); + + _irLabels.Add(label, branchBlock); + } + + if (uncond) + { + _irBlock.AddSuccessor(branchBlock); + } + else + { + // Defer registration of successor to _irBlock so that the order of successors is correct. + _ifBlock = branchBlock; + } + + _needsNewBlock = true; + _nextBlockFreq = nextFreq; + } + + public void MarkLabel(Operand label, BasicBlockFrequency nextFreq = default) + { + _nextBlockFreq = nextFreq; + + if (_irLabels.TryGetValue(label, out BasicBlock nextBlock)) + { + nextBlock.Index = _irBlocks.Count; + + _irBlocks.AddLast(nextBlock); + + NextBlock(nextBlock); + } + else + { + NewNextBlock(); + + _irLabels.Add(label, _irBlock); + } + } + + private void NewNextBlock() + { + BasicBlock block = new BasicBlock(_irBlocks.Count); + + _irBlocks.AddLast(block); + + NextBlock(block); + } + + private void NextBlock(BasicBlock nextBlock) + { + if (_irBlock?.SuccessorsCount == 0 && !EndsWithUnconditional(_irBlock)) + { + _irBlock.AddSuccessor(nextBlock); + + if (_ifBlock != null) + { + _irBlock.AddSuccessor(_ifBlock); + + _ifBlock = null; + } + } + + _irBlock = nextBlock; + _irBlock.Frequency = _nextBlockFreq; + + _needsNewBlock = false; + _nextBlockFreq = BasicBlockFrequency.Default; + } + + private static bool EndsWithUnconditional(BasicBlock block) + { + Operation last = block.Operations.Last; + + return last != default && + (last.Instruction == Instruction.Return || + last.Instruction == Instruction.Tailcall); + } + + public ControlFlowGraph GetControlFlowGraph() + { + return new ControlFlowGraph(_irBlocks.First, _irBlocks, _localsCount); + } + } +} diff --git a/src/ARMeilleure/Translation/GuestFunction.cs b/src/ARMeilleure/Translation/GuestFunction.cs new file mode 100644 index 00000000..ac131a0d --- /dev/null +++ b/src/ARMeilleure/Translation/GuestFunction.cs @@ -0,0 +1,6 @@ +using System; + +namespace ARMeilleure.Translation +{ + delegate ulong GuestFunction(IntPtr nativeContextPtr); +}
\ No newline at end of file diff --git a/src/ARMeilleure/Translation/IntervalTree.cs b/src/ARMeilleure/Translation/IntervalTree.cs new file mode 100644 index 00000000..9af01bea --- /dev/null +++ b/src/ARMeilleure/Translation/IntervalTree.cs @@ -0,0 +1,745 @@ +using System; +using System.Collections.Generic; + +namespace ARMeilleure.Translation +{ + /// <summary> + /// An Augmented Interval Tree based off of the "TreeDictionary"'s Red-Black Tree. Allows fast overlap checking of ranges. + /// </summary> + /// <typeparam name="K">Key</typeparam> + /// <typeparam name="V">Value</typeparam> + class IntervalTree<K, V> where K : IComparable<K> + { + private const int ArrayGrowthSize = 32; + + private const bool Black = true; + private const bool Red = false; + private IntervalTreeNode<K, V> _root = null; + private int _count = 0; + + public int Count => _count; + + #region Public Methods + + /// <summary> + /// Gets the values of the interval whose key is <paramref name="key"/>. + /// </summary> + /// <param name="key">Key of the node value to get</param> + /// <param name="value">Value with the given <paramref name="key"/></param> + /// <returns>True if the key is on the dictionary, false otherwise</returns> + public bool TryGet(K key, out V value) + { + IntervalTreeNode<K, V> node = GetNode(key); + + if (node == null) + { + value = default; + return false; + } + + value = node.Value; + return true; + } + + /// <summary> + /// Returns the start addresses of the intervals whose start and end keys overlap the given range. + /// </summary> + /// <param name="start">Start of the range</param> + /// <param name="end">End of the range</param> + /// <param name="overlaps">Overlaps array to place results in</param> + /// <param name="overlapCount">Index to start writing results into the array. Defaults to 0</param> + /// <returns>Number of intervals found</returns> + public int Get(K start, K end, ref K[] overlaps, int overlapCount = 0) + { + GetKeys(_root, start, end, ref overlaps, ref overlapCount); + + return overlapCount; + } + + /// <summary> + /// Adds a new interval into the tree whose start is <paramref name="start"/>, end is <paramref name="end"/> and value is <paramref name="value"/>. + /// </summary> + /// <param name="start">Start of the range to add</param> + /// <param name="end">End of the range to insert</param> + /// <param name="value">Value to add</param> + /// <param name="updateFactoryCallback">Optional factory used to create a new value if <paramref name="start"/> is already on the tree</param> + /// <exception cref="ArgumentNullException"><paramref name="value"/> is null</exception> + /// <returns>True if the value was added, false if the start key was already in the dictionary</returns> + public bool AddOrUpdate(K start, K end, V value, Func<K, V, V> updateFactoryCallback) + { + ArgumentNullException.ThrowIfNull(value); + + return BSTInsert(start, end, value, updateFactoryCallback, out IntervalTreeNode<K, V> node); + } + + /// <summary> + /// Gets an existing or adds a new interval into the tree whose start is <paramref name="start"/>, end is <paramref name="end"/> and value is <paramref name="value"/>. + /// </summary> + /// <param name="start">Start of the range to add</param> + /// <param name="end">End of the range to insert</param> + /// <param name="value">Value to add</param> + /// <exception cref="ArgumentNullException"><paramref name="value"/> is null</exception> + /// <returns><paramref name="value"/> if <paramref name="start"/> is not yet on the tree, or the existing value otherwise</returns> + public V GetOrAdd(K start, K end, V value) + { + ArgumentNullException.ThrowIfNull(value); + + BSTInsert(start, end, value, null, out IntervalTreeNode<K, V> node); + return node.Value; + } + + /// <summary> + /// Removes a value from the tree, searching for it with <paramref name="key"/>. + /// </summary> + /// <param name="key">Key of the node to remove</param> + /// <returns>Number of deleted values</returns> + public int Remove(K key) + { + int removed = Delete(key); + + _count -= removed; + + return removed; + } + + /// <summary> + /// Adds all the nodes in the dictionary into <paramref name="list"/>. + /// </summary> + /// <returns>A list of all values sorted by Key Order</returns> + public List<V> AsList() + { + List<V> list = new List<V>(); + + AddToList(_root, list); + + return list; + } + + #endregion + + #region Private Methods (BST) + + /// <summary> + /// Adds all values that are children of or contained within <paramref name="node"/> into <paramref name="list"/>, in Key Order. + /// </summary> + /// <param name="node">The node to search for values within</param> + /// <param name="list">The list to add values to</param> + private void AddToList(IntervalTreeNode<K, V> node, List<V> list) + { + if (node == null) + { + return; + } + + AddToList(node.Left, list); + + list.Add(node.Value); + + AddToList(node.Right, list); + } + + /// <summary> + /// Retrieve the node reference whose key is <paramref name="key"/>, or null if no such node exists. + /// </summary> + /// <param name="key">Key of the node to get</param> + /// <exception cref="ArgumentNullException"><paramref name="key"/> is null</exception> + /// <returns>Node reference in the tree</returns> + private IntervalTreeNode<K, V> GetNode(K key) + { + ArgumentNullException.ThrowIfNull(key); + + IntervalTreeNode<K, V> node = _root; + while (node != null) + { + int cmp = key.CompareTo(node.Start); + if (cmp < 0) + { + node = node.Left; + } + else if (cmp > 0) + { + node = node.Right; + } + else + { + return node; + } + } + return null; + } + + /// <summary> + /// Retrieve all keys that overlap the given start and end keys. + /// </summary> + /// <param name="start">Start of the range</param> + /// <param name="end">End of the range</param> + /// <param name="overlaps">Overlaps array to place results in</param> + /// <param name="overlapCount">Overlaps count to update</param> + private void GetKeys(IntervalTreeNode<K, V> node, K start, K end, ref K[] overlaps, ref int overlapCount) + { + if (node == null || start.CompareTo(node.Max) >= 0) + { + return; + } + + GetKeys(node.Left, start, end, ref overlaps, ref overlapCount); + + bool endsOnRight = end.CompareTo(node.Start) > 0; + if (endsOnRight) + { + if (start.CompareTo(node.End) < 0) + { + if (overlaps.Length >= overlapCount) + { + Array.Resize(ref overlaps, overlapCount + ArrayGrowthSize); + } + + overlaps[overlapCount++] = node.Start; + } + + GetKeys(node.Right, start, end, ref overlaps, ref overlapCount); + } + } + + /// <summary> + /// Propagate an increase in max value starting at the given node, heading up the tree. + /// This should only be called if the max increases - not for rebalancing or removals. + /// </summary> + /// <param name="node">The node to start propagating from</param> + private void PropagateIncrease(IntervalTreeNode<K, V> node) + { + K max = node.Max; + IntervalTreeNode<K, V> ptr = node; + + while ((ptr = ptr.Parent) != null) + { + if (max.CompareTo(ptr.Max) > 0) + { + ptr.Max = max; + } + else + { + break; + } + } + } + + /// <summary> + /// Propagate recalculating max value starting at the given node, heading up the tree. + /// This fully recalculates the max value from all children when there is potential for it to decrease. + /// </summary> + /// <param name="node">The node to start propagating from</param> + private void PropagateFull(IntervalTreeNode<K, V> node) + { + IntervalTreeNode<K, V> ptr = node; + + do + { + K max = ptr.End; + + if (ptr.Left != null && ptr.Left.Max.CompareTo(max) > 0) + { + max = ptr.Left.Max; + } + + if (ptr.Right != null && ptr.Right.Max.CompareTo(max) > 0) + { + max = ptr.Right.Max; + } + + ptr.Max = max; + } while ((ptr = ptr.Parent) != null); + } + + /// <summary> + /// Insertion Mechanism for the interval tree. Similar to a BST insert, with the start of the range as the key. + /// Iterates the tree starting from the root and inserts a new node where all children in the left subtree are less than <paramref name="start"/>, and all children in the right subtree are greater than <paramref name="start"/>. + /// Each node can contain multiple values, and has an end address which is the maximum of all those values. + /// Post insertion, the "max" value of the node and all parents are updated. + /// </summary> + /// <param name="start">Start of the range to insert</param> + /// <param name="end">End of the range to insert</param> + /// <param name="value">Value to insert</param> + /// <param name="updateFactoryCallback">Optional factory used to create a new value if <paramref name="start"/> is already on the tree</param> + /// <param name="outNode">Node that was inserted or modified</param> + /// <returns>True if <paramref name="start"/> was not yet on the tree, false otherwise</returns> + private bool BSTInsert(K start, K end, V value, Func<K, V, V> updateFactoryCallback, out IntervalTreeNode<K, V> outNode) + { + IntervalTreeNode<K, V> parent = null; + IntervalTreeNode<K, V> node = _root; + + while (node != null) + { + parent = node; + int cmp = start.CompareTo(node.Start); + if (cmp < 0) + { + node = node.Left; + } + else if (cmp > 0) + { + node = node.Right; + } + else + { + outNode = node; + + if (updateFactoryCallback != null) + { + // Replace + node.Value = updateFactoryCallback(start, node.Value); + + int endCmp = end.CompareTo(node.End); + + if (endCmp > 0) + { + node.End = end; + if (end.CompareTo(node.Max) > 0) + { + node.Max = end; + PropagateIncrease(node); + RestoreBalanceAfterInsertion(node); + } + } + else if (endCmp < 0) + { + node.End = end; + PropagateFull(node); + } + } + + return false; + } + } + IntervalTreeNode<K, V> newNode = new IntervalTreeNode<K, V>(start, end, value, parent); + if (newNode.Parent == null) + { + _root = newNode; + } + else if (start.CompareTo(parent.Start) < 0) + { + parent.Left = newNode; + } + else + { + parent.Right = newNode; + } + + PropagateIncrease(newNode); + _count++; + RestoreBalanceAfterInsertion(newNode); + outNode = newNode; + return true; + } + + /// <summary> + /// Removes the value from the dictionary after searching for it with <paramref name="key"/>. + /// </summary> + /// <param name="key">Key to search for</param> + /// <returns>Number of deleted values</returns> + private int Delete(K key) + { + IntervalTreeNode<K, V> nodeToDelete = GetNode(key); + + if (nodeToDelete == null) + { + return 0; + } + + IntervalTreeNode<K, V> replacementNode; + + if (LeftOf(nodeToDelete) == null || RightOf(nodeToDelete) == null) + { + replacementNode = nodeToDelete; + } + else + { + replacementNode = PredecessorOf(nodeToDelete); + } + + IntervalTreeNode<K, V> tmp = LeftOf(replacementNode) ?? RightOf(replacementNode); + + if (tmp != null) + { + tmp.Parent = ParentOf(replacementNode); + } + + if (ParentOf(replacementNode) == null) + { + _root = tmp; + } + else if (replacementNode == LeftOf(ParentOf(replacementNode))) + { + ParentOf(replacementNode).Left = tmp; + } + else + { + ParentOf(replacementNode).Right = tmp; + } + + if (replacementNode != nodeToDelete) + { + nodeToDelete.Start = replacementNode.Start; + nodeToDelete.Value = replacementNode.Value; + nodeToDelete.End = replacementNode.End; + nodeToDelete.Max = replacementNode.Max; + } + + PropagateFull(replacementNode); + + if (tmp != null && ColorOf(replacementNode) == Black) + { + RestoreBalanceAfterRemoval(tmp); + } + + return 1; + } + + /// <summary> + /// Returns the node with the largest key where <paramref name="node"/> is considered the root node. + /// </summary> + /// <param name="node">Root Node</param> + /// <returns>Node with the maximum key in the tree of <paramref name="node"/></returns> + private static IntervalTreeNode<K, V> Maximum(IntervalTreeNode<K, V> node) + { + IntervalTreeNode<K, V> tmp = node; + while (tmp.Right != null) + { + tmp = tmp.Right; + } + + return tmp; + } + + /// <summary> + /// Finds the node whose key is immediately less than <paramref name="node"/>. + /// </summary> + /// <param name="node">Node to find the predecessor of</param> + /// <returns>Predecessor of <paramref name="node"/></returns> + private static IntervalTreeNode<K, V> PredecessorOf(IntervalTreeNode<K, V> node) + { + if (node.Left != null) + { + return Maximum(node.Left); + } + IntervalTreeNode<K, V> parent = node.Parent; + while (parent != null && node == parent.Left) + { + node = parent; + parent = parent.Parent; + } + return parent; + } + + #endregion + + #region Private Methods (RBL) + + private void RestoreBalanceAfterRemoval(IntervalTreeNode<K, V> balanceNode) + { + IntervalTreeNode<K, V> ptr = balanceNode; + + while (ptr != _root && ColorOf(ptr) == Black) + { + if (ptr == LeftOf(ParentOf(ptr))) + { + IntervalTreeNode<K, V> sibling = RightOf(ParentOf(ptr)); + + if (ColorOf(sibling) == Red) + { + SetColor(sibling, Black); + SetColor(ParentOf(ptr), Red); + RotateLeft(ParentOf(ptr)); + sibling = RightOf(ParentOf(ptr)); + } + if (ColorOf(LeftOf(sibling)) == Black && ColorOf(RightOf(sibling)) == Black) + { + SetColor(sibling, Red); + ptr = ParentOf(ptr); + } + else + { + if (ColorOf(RightOf(sibling)) == Black) + { + SetColor(LeftOf(sibling), Black); + SetColor(sibling, Red); + RotateRight(sibling); + sibling = RightOf(ParentOf(ptr)); + } + SetColor(sibling, ColorOf(ParentOf(ptr))); + SetColor(ParentOf(ptr), Black); + SetColor(RightOf(sibling), Black); + RotateLeft(ParentOf(ptr)); + ptr = _root; + } + } + else + { + IntervalTreeNode<K, V> sibling = LeftOf(ParentOf(ptr)); + + if (ColorOf(sibling) == Red) + { + SetColor(sibling, Black); + SetColor(ParentOf(ptr), Red); + RotateRight(ParentOf(ptr)); + sibling = LeftOf(ParentOf(ptr)); + } + if (ColorOf(RightOf(sibling)) == Black && ColorOf(LeftOf(sibling)) == Black) + { + SetColor(sibling, Red); + ptr = ParentOf(ptr); + } + else + { + if (ColorOf(LeftOf(sibling)) == Black) + { + SetColor(RightOf(sibling), Black); + SetColor(sibling, Red); + RotateLeft(sibling); + sibling = LeftOf(ParentOf(ptr)); + } + SetColor(sibling, ColorOf(ParentOf(ptr))); + SetColor(ParentOf(ptr), Black); + SetColor(LeftOf(sibling), Black); + RotateRight(ParentOf(ptr)); + ptr = _root; + } + } + } + SetColor(ptr, Black); + } + + private void RestoreBalanceAfterInsertion(IntervalTreeNode<K, V> balanceNode) + { + SetColor(balanceNode, Red); + while (balanceNode != null && balanceNode != _root && ColorOf(ParentOf(balanceNode)) == Red) + { + if (ParentOf(balanceNode) == LeftOf(ParentOf(ParentOf(balanceNode)))) + { + IntervalTreeNode<K, V> sibling = RightOf(ParentOf(ParentOf(balanceNode))); + + if (ColorOf(sibling) == Red) + { + SetColor(ParentOf(balanceNode), Black); + SetColor(sibling, Black); + SetColor(ParentOf(ParentOf(balanceNode)), Red); + balanceNode = ParentOf(ParentOf(balanceNode)); + } + else + { + if (balanceNode == RightOf(ParentOf(balanceNode))) + { + balanceNode = ParentOf(balanceNode); + RotateLeft(balanceNode); + } + SetColor(ParentOf(balanceNode), Black); + SetColor(ParentOf(ParentOf(balanceNode)), Red); + RotateRight(ParentOf(ParentOf(balanceNode))); + } + } + else + { + IntervalTreeNode<K, V> sibling = LeftOf(ParentOf(ParentOf(balanceNode))); + + if (ColorOf(sibling) == Red) + { + SetColor(ParentOf(balanceNode), Black); + SetColor(sibling, Black); + SetColor(ParentOf(ParentOf(balanceNode)), Red); + balanceNode = ParentOf(ParentOf(balanceNode)); + } + else + { + if (balanceNode == LeftOf(ParentOf(balanceNode))) + { + balanceNode = ParentOf(balanceNode); + RotateRight(balanceNode); + } + SetColor(ParentOf(balanceNode), Black); + SetColor(ParentOf(ParentOf(balanceNode)), Red); + RotateLeft(ParentOf(ParentOf(balanceNode))); + } + } + } + SetColor(_root, Black); + } + + private void RotateLeft(IntervalTreeNode<K, V> node) + { + if (node != null) + { + IntervalTreeNode<K, V> right = RightOf(node); + node.Right = LeftOf(right); + if (node.Right != null) + { + node.Right.Parent = node; + } + IntervalTreeNode<K, V> nodeParent = ParentOf(node); + right.Parent = nodeParent; + if (nodeParent == null) + { + _root = right; + } + else if (node == LeftOf(nodeParent)) + { + nodeParent.Left = right; + } + else + { + nodeParent.Right = right; + } + right.Left = node; + node.Parent = right; + + PropagateFull(node); + } + } + + private void RotateRight(IntervalTreeNode<K, V> node) + { + if (node != null) + { + IntervalTreeNode<K, V> left = LeftOf(node); + node.Left = RightOf(left); + if (node.Left != null) + { + node.Left.Parent = node; + } + IntervalTreeNode<K, V> nodeParent = ParentOf(node); + left.Parent = nodeParent; + if (nodeParent == null) + { + _root = left; + } + else if (node == RightOf(nodeParent)) + { + nodeParent.Right = left; + } + else + { + nodeParent.Left = left; + } + left.Right = node; + node.Parent = left; + + PropagateFull(node); + } + } + + #endregion + + #region Safety-Methods + + // These methods save memory by allowing us to forego sentinel nil nodes, as well as serve as protection against NullReferenceExceptions. + + /// <summary> + /// Returns the color of <paramref name="node"/>, or Black if it is null. + /// </summary> + /// <param name="node">Node</param> + /// <returns>The boolean color of <paramref name="node"/>, or black if null</returns> + private static bool ColorOf(IntervalTreeNode<K, V> node) + { + return node == null || node.Color; + } + + /// <summary> + /// Sets the color of <paramref name="node"/> node to <paramref name="color"/>. + /// <br></br> + /// This method does nothing if <paramref name="node"/> is null. + /// </summary> + /// <param name="node">Node to set the color of</param> + /// <param name="color">Color (Boolean)</param> + private static void SetColor(IntervalTreeNode<K, V> node, bool color) + { + if (node != null) + { + node.Color = color; + } + } + + /// <summary> + /// This method returns the left node of <paramref name="node"/>, or null if <paramref name="node"/> is null. + /// </summary> + /// <param name="node">Node to retrieve the left child from</param> + /// <returns>Left child of <paramref name="node"/></returns> + private static IntervalTreeNode<K, V> LeftOf(IntervalTreeNode<K, V> node) + { + return node?.Left; + } + + /// <summary> + /// This method returns the right node of <paramref name="node"/>, or null if <paramref name="node"/> is null. + /// </summary> + /// <param name="node">Node to retrieve the right child from</param> + /// <returns>Right child of <paramref name="node"/></returns> + private static IntervalTreeNode<K, V> RightOf(IntervalTreeNode<K, V> node) + { + return node?.Right; + } + + /// <summary> + /// Returns the parent node of <paramref name="node"/>, or null if <paramref name="node"/> is null. + /// </summary> + /// <param name="node">Node to retrieve the parent from</param> + /// <returns>Parent of <paramref name="node"/></returns> + private static IntervalTreeNode<K, V> ParentOf(IntervalTreeNode<K, V> node) + { + return node?.Parent; + } + + #endregion + + public bool ContainsKey(K key) + { + return GetNode(key) != null; + } + + public void Clear() + { + _root = null; + _count = 0; + } + } + + /// <summary> + /// Represents a node in the IntervalTree which contains start and end keys of type K, and a value of generic type V. + /// </summary> + /// <typeparam name="K">Key type of the node</typeparam> + /// <typeparam name="V">Value type of the node</typeparam> + class IntervalTreeNode<K, V> + { + public bool Color = true; + public IntervalTreeNode<K, V> Left = null; + public IntervalTreeNode<K, V> Right = null; + public IntervalTreeNode<K, V> Parent = null; + + /// <summary> + /// The start of the range. + /// </summary> + public K Start; + + /// <summary> + /// The end of the range. + /// </summary> + public K End; + + /// <summary> + /// The maximum end value of this node and all its children. + /// </summary> + public K Max; + + /// <summary> + /// Value stored on this node. + /// </summary> + public V Value; + + public IntervalTreeNode(K start, K end, V value, IntervalTreeNode<K, V> parent) + { + Start = start; + End = end; + Max = end; + Value = value; + Parent = parent; + } + } +} diff --git a/src/ARMeilleure/Translation/PTC/EncodingCache.cs b/src/ARMeilleure/Translation/PTC/EncodingCache.cs new file mode 100644 index 00000000..90d40c47 --- /dev/null +++ b/src/ARMeilleure/Translation/PTC/EncodingCache.cs @@ -0,0 +1,9 @@ +using System.Text; + +namespace ARMeilleure.Translation.PTC +{ + static class EncodingCache + { + public static readonly Encoding UTF8NoBOM = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false, throwOnInvalidBytes: true); + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Translation/PTC/IPtcLoadState.cs b/src/ARMeilleure/Translation/PTC/IPtcLoadState.cs new file mode 100644 index 00000000..1b11ac0b --- /dev/null +++ b/src/ARMeilleure/Translation/PTC/IPtcLoadState.cs @@ -0,0 +1,10 @@ +using System; + +namespace ARMeilleure.Translation.PTC +{ + public interface IPtcLoadState + { + event Action<PtcLoadingState, int, int> PtcStateChanged; + void Continue(); + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Translation/PTC/Ptc.cs b/src/ARMeilleure/Translation/PTC/Ptc.cs new file mode 100644 index 00000000..ea4e715b --- /dev/null +++ b/src/ARMeilleure/Translation/PTC/Ptc.cs @@ -0,0 +1,1131 @@ +using ARMeilleure.CodeGen; +using ARMeilleure.CodeGen.Linking; +using ARMeilleure.CodeGen.Unwinding; +using ARMeilleure.Common; +using ARMeilleure.Memory; +using Ryujinx.Common; +using Ryujinx.Common.Configuration; +using Ryujinx.Common.Logging; +using Ryujinx.Common.Memory; +using System; +using System.Buffers.Binary; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.IO.Compression; +using System.Runtime; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Threading; + +using static ARMeilleure.Translation.PTC.PtcFormatter; + +namespace ARMeilleure.Translation.PTC +{ + using Arm64HardwareCapabilities = ARMeilleure.CodeGen.Arm64.HardwareCapabilities; + using X86HardwareCapabilities = ARMeilleure.CodeGen.X86.HardwareCapabilities; + + class Ptc : IPtcLoadState + { + private const string OuterHeaderMagicString = "PTCohd\0\0"; + private const string InnerHeaderMagicString = "PTCihd\0\0"; + + private const uint InternalVersion = 4661; //! To be incremented manually for each change to the ARMeilleure project. + + private const string ActualDir = "0"; + private const string BackupDir = "1"; + + private const string TitleIdTextDefault = "0000000000000000"; + private const string DisplayVersionDefault = "0"; + + public static readonly Symbol PageTableSymbol = new(SymbolType.Special, 1); + public static readonly Symbol CountTableSymbol = new(SymbolType.Special, 2); + public static readonly Symbol DispatchStubSymbol = new(SymbolType.Special, 3); + + private const byte FillingByte = 0x00; + private const CompressionLevel SaveCompressionLevel = CompressionLevel.Fastest; + + public PtcProfiler Profiler { get; } + + // Carriers. + private MemoryStream _infosStream; + private List<byte[]> _codesList; + private MemoryStream _relocsStream; + private MemoryStream _unwindInfosStream; + + private readonly ulong _outerHeaderMagic; + private readonly ulong _innerHeaderMagic; + + private readonly ManualResetEvent _waitEvent; + + private readonly object _lock; + + private bool _disposed; + + public string TitleIdText { get; private set; } + public string DisplayVersion { get; private set; } + + private MemoryManagerType _memoryMode; + + public string CachePathActual { get; private set; } + public string CachePathBackup { get; private set; } + + public PtcState State { get; private set; } + + // Progress reporting helpers. + private volatile int _translateCount; + private volatile int _translateTotalCount; + public event Action<PtcLoadingState, int, int> PtcStateChanged; + + public Ptc() + { + Profiler = new PtcProfiler(this); + + InitializeCarriers(); + + _outerHeaderMagic = BinaryPrimitives.ReadUInt64LittleEndian(EncodingCache.UTF8NoBOM.GetBytes(OuterHeaderMagicString).AsSpan()); + _innerHeaderMagic = BinaryPrimitives.ReadUInt64LittleEndian(EncodingCache.UTF8NoBOM.GetBytes(InnerHeaderMagicString).AsSpan()); + + _waitEvent = new ManualResetEvent(true); + + _lock = new object(); + + _disposed = false; + + TitleIdText = TitleIdTextDefault; + DisplayVersion = DisplayVersionDefault; + + CachePathActual = string.Empty; + CachePathBackup = string.Empty; + + Disable(); + } + + public void Initialize(string titleIdText, string displayVersion, bool enabled, MemoryManagerType memoryMode) + { + Wait(); + + Profiler.Wait(); + Profiler.ClearEntries(); + + Logger.Info?.Print(LogClass.Ptc, $"Initializing Profiled Persistent Translation Cache (enabled: {enabled})."); + + if (!enabled || string.IsNullOrEmpty(titleIdText) || titleIdText == TitleIdTextDefault) + { + TitleIdText = TitleIdTextDefault; + DisplayVersion = DisplayVersionDefault; + + CachePathActual = string.Empty; + CachePathBackup = string.Empty; + + Disable(); + + return; + } + + TitleIdText = titleIdText; + DisplayVersion = !string.IsNullOrEmpty(displayVersion) ? displayVersion : DisplayVersionDefault; + _memoryMode = memoryMode; + + string workPathActual = Path.Combine(AppDataManager.GamesDirPath, TitleIdText, "cache", "cpu", ActualDir); + string workPathBackup = Path.Combine(AppDataManager.GamesDirPath, TitleIdText, "cache", "cpu", BackupDir); + + if (!Directory.Exists(workPathActual)) + { + Directory.CreateDirectory(workPathActual); + } + + if (!Directory.Exists(workPathBackup)) + { + Directory.CreateDirectory(workPathBackup); + } + + CachePathActual = Path.Combine(workPathActual, DisplayVersion); + CachePathBackup = Path.Combine(workPathBackup, DisplayVersion); + + PreLoad(); + Profiler.PreLoad(); + + Enable(); + } + + private void InitializeCarriers() + { + _infosStream = MemoryStreamManager.Shared.GetStream(); + _codesList = new List<byte[]>(); + _relocsStream = MemoryStreamManager.Shared.GetStream(); + _unwindInfosStream = MemoryStreamManager.Shared.GetStream(); + } + + private void DisposeCarriers() + { + _infosStream.Dispose(); + _codesList.Clear(); + _relocsStream.Dispose(); + _unwindInfosStream.Dispose(); + } + + private bool AreCarriersEmpty() + { + return _infosStream.Length == 0L && _codesList.Count == 0 && _relocsStream.Length == 0L && _unwindInfosStream.Length == 0L; + } + + private void ResetCarriersIfNeeded() + { + if (AreCarriersEmpty()) + { + return; + } + + DisposeCarriers(); + + InitializeCarriers(); + } + + private void PreLoad() + { + string fileNameActual = $"{CachePathActual}.cache"; + string fileNameBackup = $"{CachePathBackup}.cache"; + + FileInfo fileInfoActual = new FileInfo(fileNameActual); + FileInfo fileInfoBackup = new FileInfo(fileNameBackup); + + if (fileInfoActual.Exists && fileInfoActual.Length != 0L) + { + if (!Load(fileNameActual, false)) + { + if (fileInfoBackup.Exists && fileInfoBackup.Length != 0L) + { + Load(fileNameBackup, true); + } + } + } + else if (fileInfoBackup.Exists && fileInfoBackup.Length != 0L) + { + Load(fileNameBackup, true); + } + } + + private unsafe bool Load(string fileName, bool isBackup) + { + using (FileStream compressedStream = new(fileName, FileMode.Open)) + using (DeflateStream deflateStream = new(compressedStream, CompressionMode.Decompress, true)) + { + OuterHeader outerHeader = DeserializeStructure<OuterHeader>(compressedStream); + + if (!outerHeader.IsHeaderValid()) + { + InvalidateCompressedStream(compressedStream); + + return false; + } + + if (outerHeader.Magic != _outerHeaderMagic) + { + InvalidateCompressedStream(compressedStream); + + return false; + } + + if (outerHeader.CacheFileVersion != InternalVersion) + { + InvalidateCompressedStream(compressedStream); + + return false; + } + + if (outerHeader.Endianness != GetEndianness()) + { + InvalidateCompressedStream(compressedStream); + + return false; + } + + if (outerHeader.FeatureInfo != GetFeatureInfo()) + { + InvalidateCompressedStream(compressedStream); + + return false; + } + + if (outerHeader.MemoryManagerMode != GetMemoryManagerMode()) + { + InvalidateCompressedStream(compressedStream); + + return false; + } + + if (outerHeader.OSPlatform != GetOSPlatform()) + { + InvalidateCompressedStream(compressedStream); + + return false; + } + + if (outerHeader.Architecture != (uint)RuntimeInformation.ProcessArchitecture) + { + InvalidateCompressedStream(compressedStream); + + return false; + } + + IntPtr intPtr = IntPtr.Zero; + + try + { + intPtr = Marshal.AllocHGlobal(new IntPtr(outerHeader.UncompressedStreamSize)); + + using (UnmanagedMemoryStream stream = new((byte*)intPtr.ToPointer(), outerHeader.UncompressedStreamSize, outerHeader.UncompressedStreamSize, FileAccess.ReadWrite)) + { + try + { + deflateStream.CopyTo(stream); + } + catch + { + InvalidateCompressedStream(compressedStream); + + return false; + } + + Debug.Assert(stream.Position == stream.Length); + + stream.Seek(0L, SeekOrigin.Begin); + + InnerHeader innerHeader = DeserializeStructure<InnerHeader>(stream); + + if (!innerHeader.IsHeaderValid()) + { + InvalidateCompressedStream(compressedStream); + + return false; + } + + if (innerHeader.Magic != _innerHeaderMagic) + { + InvalidateCompressedStream(compressedStream); + + return false; + } + + ReadOnlySpan<byte> infosBytes = new(stream.PositionPointer, innerHeader.InfosLength); + stream.Seek(innerHeader.InfosLength, SeekOrigin.Current); + + Hash128 infosHash = XXHash128.ComputeHash(infosBytes); + + if (innerHeader.InfosHash != infosHash) + { + InvalidateCompressedStream(compressedStream); + + return false; + } + + ReadOnlySpan<byte> codesBytes = (int)innerHeader.CodesLength > 0 ? new(stream.PositionPointer, (int)innerHeader.CodesLength) : ReadOnlySpan<byte>.Empty; + stream.Seek(innerHeader.CodesLength, SeekOrigin.Current); + + Hash128 codesHash = XXHash128.ComputeHash(codesBytes); + + if (innerHeader.CodesHash != codesHash) + { + InvalidateCompressedStream(compressedStream); + + return false; + } + + ReadOnlySpan<byte> relocsBytes = new(stream.PositionPointer, innerHeader.RelocsLength); + stream.Seek(innerHeader.RelocsLength, SeekOrigin.Current); + + Hash128 relocsHash = XXHash128.ComputeHash(relocsBytes); + + if (innerHeader.RelocsHash != relocsHash) + { + InvalidateCompressedStream(compressedStream); + + return false; + } + + ReadOnlySpan<byte> unwindInfosBytes = new(stream.PositionPointer, innerHeader.UnwindInfosLength); + stream.Seek(innerHeader.UnwindInfosLength, SeekOrigin.Current); + + Hash128 unwindInfosHash = XXHash128.ComputeHash(unwindInfosBytes); + + if (innerHeader.UnwindInfosHash != unwindInfosHash) + { + InvalidateCompressedStream(compressedStream); + + return false; + } + + Debug.Assert(stream.Position == stream.Length); + + stream.Seek((long)Unsafe.SizeOf<InnerHeader>(), SeekOrigin.Begin); + + _infosStream.Write(infosBytes); + stream.Seek(innerHeader.InfosLength, SeekOrigin.Current); + + _codesList.ReadFrom(stream); + + _relocsStream.Write(relocsBytes); + stream.Seek(innerHeader.RelocsLength, SeekOrigin.Current); + + _unwindInfosStream.Write(unwindInfosBytes); + stream.Seek(innerHeader.UnwindInfosLength, SeekOrigin.Current); + + Debug.Assert(stream.Position == stream.Length); + } + } + finally + { + if (intPtr != IntPtr.Zero) + { + Marshal.FreeHGlobal(intPtr); + } + } + } + + long fileSize = new FileInfo(fileName).Length; + + Logger.Info?.Print(LogClass.Ptc, $"{(isBackup ? "Loaded Backup Translation Cache" : "Loaded Translation Cache")} (size: {fileSize} bytes, translated functions: {GetEntriesCount()})."); + + return true; + } + + private void InvalidateCompressedStream(FileStream compressedStream) + { + compressedStream.SetLength(0L); + } + + private void PreSave() + { + _waitEvent.Reset(); + + try + { + string fileNameActual = $"{CachePathActual}.cache"; + string fileNameBackup = $"{CachePathBackup}.cache"; + + FileInfo fileInfoActual = new FileInfo(fileNameActual); + + if (fileInfoActual.Exists && fileInfoActual.Length != 0L) + { + File.Copy(fileNameActual, fileNameBackup, true); + } + + Save(fileNameActual); + } + finally + { + ResetCarriersIfNeeded(); + + GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce; + } + + _waitEvent.Set(); + } + + private unsafe void Save(string fileName) + { + int translatedFuncsCount; + + InnerHeader innerHeader = new InnerHeader(); + + innerHeader.Magic = _innerHeaderMagic; + + innerHeader.InfosLength = (int)_infosStream.Length; + innerHeader.CodesLength = _codesList.Length(); + innerHeader.RelocsLength = (int)_relocsStream.Length; + innerHeader.UnwindInfosLength = (int)_unwindInfosStream.Length; + + OuterHeader outerHeader = new OuterHeader(); + + outerHeader.Magic = _outerHeaderMagic; + + outerHeader.CacheFileVersion = InternalVersion; + outerHeader.Endianness = GetEndianness(); + outerHeader.FeatureInfo = GetFeatureInfo(); + outerHeader.MemoryManagerMode = GetMemoryManagerMode(); + outerHeader.OSPlatform = GetOSPlatform(); + outerHeader.Architecture = (uint)RuntimeInformation.ProcessArchitecture; + + outerHeader.UncompressedStreamSize = + (long)Unsafe.SizeOf<InnerHeader>() + + innerHeader.InfosLength + + innerHeader.CodesLength + + innerHeader.RelocsLength + + innerHeader.UnwindInfosLength; + + outerHeader.SetHeaderHash(); + + IntPtr intPtr = IntPtr.Zero; + + try + { + intPtr = Marshal.AllocHGlobal(new IntPtr(outerHeader.UncompressedStreamSize)); + + using (UnmanagedMemoryStream stream = new((byte*)intPtr.ToPointer(), outerHeader.UncompressedStreamSize, outerHeader.UncompressedStreamSize, FileAccess.ReadWrite)) + { + stream.Seek((long)Unsafe.SizeOf<InnerHeader>(), SeekOrigin.Begin); + + ReadOnlySpan<byte> infosBytes = new(stream.PositionPointer, innerHeader.InfosLength); + _infosStream.WriteTo(stream); + + ReadOnlySpan<byte> codesBytes = (int)innerHeader.CodesLength > 0 ? new(stream.PositionPointer, (int)innerHeader.CodesLength) : ReadOnlySpan<byte>.Empty; + _codesList.WriteTo(stream); + + ReadOnlySpan<byte> relocsBytes = new(stream.PositionPointer, innerHeader.RelocsLength); + _relocsStream.WriteTo(stream); + + ReadOnlySpan<byte> unwindInfosBytes = new(stream.PositionPointer, innerHeader.UnwindInfosLength); + _unwindInfosStream.WriteTo(stream); + + Debug.Assert(stream.Position == stream.Length); + + innerHeader.InfosHash = XXHash128.ComputeHash(infosBytes); + innerHeader.CodesHash = XXHash128.ComputeHash(codesBytes); + innerHeader.RelocsHash = XXHash128.ComputeHash(relocsBytes); + innerHeader.UnwindInfosHash = XXHash128.ComputeHash(unwindInfosBytes); + + innerHeader.SetHeaderHash(); + + stream.Seek(0L, SeekOrigin.Begin); + SerializeStructure(stream, innerHeader); + + translatedFuncsCount = GetEntriesCount(); + + ResetCarriersIfNeeded(); + + using (FileStream compressedStream = new(fileName, FileMode.OpenOrCreate)) + using (DeflateStream deflateStream = new(compressedStream, SaveCompressionLevel, true)) + { + try + { + SerializeStructure(compressedStream, outerHeader); + + stream.Seek(0L, SeekOrigin.Begin); + stream.CopyTo(deflateStream); + } + catch + { + compressedStream.Position = 0L; + } + + if (compressedStream.Position < compressedStream.Length) + { + compressedStream.SetLength(compressedStream.Position); + } + } + } + } + finally + { + if (intPtr != IntPtr.Zero) + { + Marshal.FreeHGlobal(intPtr); + } + } + + long fileSize = new FileInfo(fileName).Length; + + if (fileSize != 0L) + { + Logger.Info?.Print(LogClass.Ptc, $"Saved Translation Cache (size: {fileSize} bytes, translated functions: {translatedFuncsCount})."); + } + } + + public void LoadTranslations(Translator translator) + { + if (AreCarriersEmpty()) + { + return; + } + + long infosStreamLength = _infosStream.Length; + long relocsStreamLength = _relocsStream.Length; + long unwindInfosStreamLength = _unwindInfosStream.Length; + + _infosStream.Seek(0L, SeekOrigin.Begin); + _relocsStream.Seek(0L, SeekOrigin.Begin); + _unwindInfosStream.Seek(0L, SeekOrigin.Begin); + + using (BinaryReader relocsReader = new(_relocsStream, EncodingCache.UTF8NoBOM, true)) + using (BinaryReader unwindInfosReader = new(_unwindInfosStream, EncodingCache.UTF8NoBOM, true)) + { + for (int index = 0; index < GetEntriesCount(); index++) + { + InfoEntry infoEntry = DeserializeStructure<InfoEntry>(_infosStream); + + if (infoEntry.Stubbed) + { + SkipCode(index, infoEntry.CodeLength); + SkipReloc(infoEntry.RelocEntriesCount); + SkipUnwindInfo(unwindInfosReader); + + continue; + } + + bool isEntryChanged = infoEntry.Hash != ComputeHash(translator.Memory, infoEntry.Address, infoEntry.GuestSize); + + if (isEntryChanged || (!infoEntry.HighCq && Profiler.ProfiledFuncs.TryGetValue(infoEntry.Address, out var value) && value.HighCq)) + { + infoEntry.Stubbed = true; + infoEntry.CodeLength = 0; + UpdateInfo(infoEntry); + + StubCode(index); + StubReloc(infoEntry.RelocEntriesCount); + StubUnwindInfo(unwindInfosReader); + + if (isEntryChanged) + { + Logger.Info?.Print(LogClass.Ptc, $"Invalidated translated function (address: 0x{infoEntry.Address:X16})"); + } + + continue; + } + + byte[] code = ReadCode(index, infoEntry.CodeLength); + + Counter<uint> callCounter = null; + + if (infoEntry.RelocEntriesCount != 0) + { + RelocEntry[] relocEntries = GetRelocEntries(relocsReader, infoEntry.RelocEntriesCount); + + PatchCode(translator, code, relocEntries, out callCounter); + } + + UnwindInfo unwindInfo = ReadUnwindInfo(unwindInfosReader); + + TranslatedFunction func = FastTranslate(code, callCounter, infoEntry.GuestSize, unwindInfo, infoEntry.HighCq); + + translator.RegisterFunction(infoEntry.Address, func); + + bool isAddressUnique = translator.Functions.TryAdd(infoEntry.Address, infoEntry.GuestSize, func); + + Debug.Assert(isAddressUnique, $"The address 0x{infoEntry.Address:X16} is not unique."); + } + } + + if (_infosStream.Length != infosStreamLength || _infosStream.Position != infosStreamLength || + _relocsStream.Length != relocsStreamLength || _relocsStream.Position != relocsStreamLength || + _unwindInfosStream.Length != unwindInfosStreamLength || _unwindInfosStream.Position != unwindInfosStreamLength) + { + throw new Exception("The length of a memory stream has changed, or its position has not reached or has exceeded its end."); + } + + Logger.Info?.Print(LogClass.Ptc, $"{translator.Functions.Count} translated functions loaded"); + } + + private int GetEntriesCount() + { + return _codesList.Count; + } + + [Conditional("DEBUG")] + private void SkipCode(int index, int codeLength) + { + Debug.Assert(_codesList[index].Length == 0); + Debug.Assert(codeLength == 0); + } + + private void SkipReloc(int relocEntriesCount) + { + _relocsStream.Seek(relocEntriesCount * RelocEntry.Stride, SeekOrigin.Current); + } + + private void SkipUnwindInfo(BinaryReader unwindInfosReader) + { + int pushEntriesLength = unwindInfosReader.ReadInt32(); + + _unwindInfosStream.Seek(pushEntriesLength * UnwindPushEntry.Stride + UnwindInfo.Stride, SeekOrigin.Current); + } + + private byte[] ReadCode(int index, int codeLength) + { + Debug.Assert(_codesList[index].Length == codeLength); + + return _codesList[index]; + } + + private RelocEntry[] GetRelocEntries(BinaryReader relocsReader, int relocEntriesCount) + { + RelocEntry[] relocEntries = new RelocEntry[relocEntriesCount]; + + for (int i = 0; i < relocEntriesCount; i++) + { + int position = relocsReader.ReadInt32(); + SymbolType type = (SymbolType)relocsReader.ReadByte(); + ulong value = relocsReader.ReadUInt64(); + + relocEntries[i] = new RelocEntry(position, new Symbol(type, value)); + } + + return relocEntries; + } + + private void PatchCode(Translator translator, Span<byte> code, RelocEntry[] relocEntries, out Counter<uint> callCounter) + { + callCounter = null; + + foreach (RelocEntry relocEntry in relocEntries) + { + IntPtr? imm = null; + Symbol symbol = relocEntry.Symbol; + + if (symbol.Type == SymbolType.FunctionTable) + { + ulong guestAddress = symbol.Value; + + if (translator.FunctionTable.IsValid(guestAddress)) + { + unsafe { imm = (IntPtr)Unsafe.AsPointer(ref translator.FunctionTable.GetValue(guestAddress)); } + } + } + else if (symbol.Type == SymbolType.DelegateTable) + { + int index = (int)symbol.Value; + + if (Delegates.TryGetDelegateFuncPtrByIndex(index, out IntPtr funcPtr)) + { + imm = funcPtr; + } + } + else if (symbol == PageTableSymbol) + { + imm = translator.Memory.PageTablePointer; + } + else if (symbol == CountTableSymbol) + { + if (callCounter == null) + { + callCounter = new Counter<uint>(translator.CountTable); + } + + unsafe { imm = (IntPtr)Unsafe.AsPointer(ref callCounter.Value); } + } + else if (symbol == DispatchStubSymbol) + { + imm = translator.Stubs.DispatchStub; + } + + if (imm == null) + { + throw new Exception($"Unexpected reloc entry {relocEntry}."); + } + + BinaryPrimitives.WriteUInt64LittleEndian(code.Slice(relocEntry.Position, 8), (ulong)imm.Value); + } + } + + private UnwindInfo ReadUnwindInfo(BinaryReader unwindInfosReader) + { + int pushEntriesLength = unwindInfosReader.ReadInt32(); + + UnwindPushEntry[] pushEntries = new UnwindPushEntry[pushEntriesLength]; + + for (int i = 0; i < pushEntriesLength; i++) + { + int pseudoOp = unwindInfosReader.ReadInt32(); + int prologOffset = unwindInfosReader.ReadInt32(); + int regIndex = unwindInfosReader.ReadInt32(); + int stackOffsetOrAllocSize = unwindInfosReader.ReadInt32(); + + pushEntries[i] = new UnwindPushEntry((UnwindPseudoOp)pseudoOp, prologOffset, regIndex, stackOffsetOrAllocSize); + } + + int prologueSize = unwindInfosReader.ReadInt32(); + + return new UnwindInfo(pushEntries, prologueSize); + } + + private TranslatedFunction FastTranslate( + byte[] code, + Counter<uint> callCounter, + ulong guestSize, + UnwindInfo unwindInfo, + bool highCq) + { + var cFunc = new CompiledFunction(code, unwindInfo, RelocInfo.Empty); + var gFunc = cFunc.MapWithPointer<GuestFunction>(out IntPtr gFuncPointer); + + return new TranslatedFunction(gFunc, gFuncPointer, callCounter, guestSize, highCq); + } + + private void UpdateInfo(InfoEntry infoEntry) + { + _infosStream.Seek(-Unsafe.SizeOf<InfoEntry>(), SeekOrigin.Current); + + SerializeStructure(_infosStream, infoEntry); + } + + private void StubCode(int index) + { + _codesList[index] = Array.Empty<byte>(); + } + + private void StubReloc(int relocEntriesCount) + { + for (int i = 0; i < relocEntriesCount * RelocEntry.Stride; i++) + { + _relocsStream.WriteByte(FillingByte); + } + } + + private void StubUnwindInfo(BinaryReader unwindInfosReader) + { + int pushEntriesLength = unwindInfosReader.ReadInt32(); + + for (int i = 0; i < pushEntriesLength * UnwindPushEntry.Stride + UnwindInfo.Stride; i++) + { + _unwindInfosStream.WriteByte(FillingByte); + } + } + + public void MakeAndSaveTranslations(Translator translator) + { + var profiledFuncsToTranslate = Profiler.GetProfiledFuncsToTranslate(translator.Functions); + + _translateCount = 0; + _translateTotalCount = profiledFuncsToTranslate.Count; + + if (_translateTotalCount == 0) + { + ResetCarriersIfNeeded(); + + GCSettings.LargeObjectHeapCompactionMode = GCLargeObjectHeapCompactionMode.CompactOnce; + + return; + } + + int degreeOfParallelism = Environment.ProcessorCount; + + // If there are enough cores lying around, we leave one alone for other tasks. + if (degreeOfParallelism > 4) + { + degreeOfParallelism--; + } + + Logger.Info?.Print(LogClass.Ptc, $"{_translateCount} of {_translateTotalCount} functions translated | Thread count: {degreeOfParallelism}"); + + PtcStateChanged?.Invoke(PtcLoadingState.Start, _translateCount, _translateTotalCount); + + using AutoResetEvent progressReportEvent = new AutoResetEvent(false); + + Thread progressReportThread = new Thread(ReportProgress) + { + Name = "Ptc.ProgressReporter", + Priority = ThreadPriority.Lowest, + IsBackground = true + }; + + progressReportThread.Start(progressReportEvent); + + void TranslateFuncs() + { + while (profiledFuncsToTranslate.TryDequeue(out var item)) + { + ulong address = item.address; + + Debug.Assert(Profiler.IsAddressInStaticCodeRange(address)); + + TranslatedFunction func = translator.Translate(address, item.funcProfile.Mode, item.funcProfile.HighCq); + + bool isAddressUnique = translator.Functions.TryAdd(address, func.GuestSize, func); + + Debug.Assert(isAddressUnique, $"The address 0x{address:X16} is not unique."); + + Interlocked.Increment(ref _translateCount); + + translator.RegisterFunction(address, func); + + if (State != PtcState.Enabled) + { + break; + } + } + } + + List<Thread> threads = new List<Thread>(); + + for (int i = 0; i < degreeOfParallelism; i++) + { + Thread thread = new Thread(TranslateFuncs); + thread.IsBackground = true; + + threads.Add(thread); + } + + Stopwatch sw = Stopwatch.StartNew(); + + threads.ForEach((thread) => thread.Start()); + threads.ForEach((thread) => thread.Join()); + + threads.Clear(); + + progressReportEvent.Set(); + progressReportThread.Join(); + + sw.Stop(); + + PtcStateChanged?.Invoke(PtcLoadingState.Loaded, _translateCount, _translateTotalCount); + + Logger.Info?.Print(LogClass.Ptc, $"{_translateCount} of {_translateTotalCount} functions translated | Thread count: {degreeOfParallelism} in {sw.Elapsed.TotalSeconds} s"); + + Thread preSaveThread = new Thread(PreSave); + preSaveThread.IsBackground = true; + preSaveThread.Start(); + } + + private void ReportProgress(object state) + { + const int refreshRate = 50; // ms. + + AutoResetEvent endEvent = (AutoResetEvent)state; + + int count = 0; + + do + { + int newCount = _translateCount; + + if (count != newCount) + { + PtcStateChanged?.Invoke(PtcLoadingState.Loading, newCount, _translateTotalCount); + count = newCount; + } + } + while (!endEvent.WaitOne(refreshRate)); + } + + public static Hash128 ComputeHash(IMemoryManager memory, ulong address, ulong guestSize) + { + return XXHash128.ComputeHash(memory.GetSpan(address, checked((int)(guestSize)))); + } + + public void WriteCompiledFunction(ulong address, ulong guestSize, Hash128 hash, bool highCq, CompiledFunction compiledFunc) + { + lock (_lock) + { + byte[] code = compiledFunc.Code; + RelocInfo relocInfo = compiledFunc.RelocInfo; + UnwindInfo unwindInfo = compiledFunc.UnwindInfo; + + InfoEntry infoEntry = new InfoEntry(); + + infoEntry.Address = address; + infoEntry.GuestSize = guestSize; + infoEntry.Hash = hash; + infoEntry.HighCq = highCq; + infoEntry.Stubbed = false; + infoEntry.CodeLength = code.Length; + infoEntry.RelocEntriesCount = relocInfo.Entries.Length; + + SerializeStructure(_infosStream, infoEntry); + + WriteCode(code.AsSpan()); + + // WriteReloc. + using var relocInfoWriter = new BinaryWriter(_relocsStream, EncodingCache.UTF8NoBOM, true); + + foreach (RelocEntry entry in relocInfo.Entries) + { + relocInfoWriter.Write(entry.Position); + relocInfoWriter.Write((byte)entry.Symbol.Type); + relocInfoWriter.Write(entry.Symbol.Value); + } + + // WriteUnwindInfo. + using var unwindInfoWriter = new BinaryWriter(_unwindInfosStream, EncodingCache.UTF8NoBOM, true); + + unwindInfoWriter.Write(unwindInfo.PushEntries.Length); + + foreach (UnwindPushEntry unwindPushEntry in unwindInfo.PushEntries) + { + unwindInfoWriter.Write((int)unwindPushEntry.PseudoOp); + unwindInfoWriter.Write(unwindPushEntry.PrologOffset); + unwindInfoWriter.Write(unwindPushEntry.RegIndex); + unwindInfoWriter.Write(unwindPushEntry.StackOffsetOrAllocSize); + } + + unwindInfoWriter.Write(unwindInfo.PrologSize); + } + } + + private void WriteCode(ReadOnlySpan<byte> code) + { + _codesList.Add(code.ToArray()); + } + + public static bool GetEndianness() + { + return BitConverter.IsLittleEndian; + } + + private static FeatureInfo GetFeatureInfo() + { + if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64) + { + return new FeatureInfo( + (ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap, + (ulong)Arm64HardwareCapabilities.LinuxFeatureInfoHwCap2, + (ulong)Arm64HardwareCapabilities.MacOsFeatureInfo, + 0, + 0); + } + else if (RuntimeInformation.ProcessArchitecture == Architecture.X64) + { + return new FeatureInfo( + (ulong)X86HardwareCapabilities.FeatureInfo1Ecx, + (ulong)X86HardwareCapabilities.FeatureInfo1Edx, + (ulong)X86HardwareCapabilities.FeatureInfo7Ebx, + (ulong)X86HardwareCapabilities.FeatureInfo7Ecx, + (ulong)X86HardwareCapabilities.Xcr0InfoEax); + } + else + { + return new FeatureInfo(0, 0, 0, 0, 0); + } + } + + private byte GetMemoryManagerMode() + { + return (byte)_memoryMode; + } + + private static uint GetOSPlatform() + { + uint osPlatform = 0u; + + osPlatform |= (OperatingSystem.IsFreeBSD() ? 1u : 0u) << 0; + osPlatform |= (OperatingSystem.IsLinux() ? 1u : 0u) << 1; + osPlatform |= (OperatingSystem.IsMacOS() ? 1u : 0u) << 2; + osPlatform |= (OperatingSystem.IsWindows() ? 1u : 0u) << 3; + + return osPlatform; + } + + [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 86*/)] + private struct OuterHeader + { + public ulong Magic; + + public uint CacheFileVersion; + + public bool Endianness; + public FeatureInfo FeatureInfo; + public byte MemoryManagerMode; + public uint OSPlatform; + public uint Architecture; + + public long UncompressedStreamSize; + + public Hash128 HeaderHash; + + public void SetHeaderHash() + { + Span<OuterHeader> spanHeader = MemoryMarshal.CreateSpan(ref this, 1); + + HeaderHash = XXHash128.ComputeHash(MemoryMarshal.AsBytes(spanHeader).Slice(0, Unsafe.SizeOf<OuterHeader>() - Unsafe.SizeOf<Hash128>())); + } + + public bool IsHeaderValid() + { + Span<OuterHeader> spanHeader = MemoryMarshal.CreateSpan(ref this, 1); + + return XXHash128.ComputeHash(MemoryMarshal.AsBytes(spanHeader).Slice(0, Unsafe.SizeOf<OuterHeader>() - Unsafe.SizeOf<Hash128>())) == HeaderHash; + } + } + + [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 40*/)] + private record struct FeatureInfo(ulong FeatureInfo0, ulong FeatureInfo1, ulong FeatureInfo2, ulong FeatureInfo3, ulong FeatureInfo4); + + [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 128*/)] + private struct InnerHeader + { + public ulong Magic; + + public int InfosLength; + public long CodesLength; + public int RelocsLength; + public int UnwindInfosLength; + + public Hash128 InfosHash; + public Hash128 CodesHash; + public Hash128 RelocsHash; + public Hash128 UnwindInfosHash; + + public Hash128 HeaderHash; + + public void SetHeaderHash() + { + Span<InnerHeader> spanHeader = MemoryMarshal.CreateSpan(ref this, 1); + + HeaderHash = XXHash128.ComputeHash(MemoryMarshal.AsBytes(spanHeader).Slice(0, Unsafe.SizeOf<InnerHeader>() - Unsafe.SizeOf<Hash128>())); + } + + public bool IsHeaderValid() + { + Span<InnerHeader> spanHeader = MemoryMarshal.CreateSpan(ref this, 1); + + return XXHash128.ComputeHash(MemoryMarshal.AsBytes(spanHeader).Slice(0, Unsafe.SizeOf<InnerHeader>() - Unsafe.SizeOf<Hash128>())) == HeaderHash; + } + } + + [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 42*/)] + private struct InfoEntry + { + public ulong Address; + public ulong GuestSize; + public Hash128 Hash; + public bool HighCq; + public bool Stubbed; + public int CodeLength; + public int RelocEntriesCount; + } + + private void Enable() + { + State = PtcState.Enabled; + } + + public void Continue() + { + if (State == PtcState.Enabled) + { + State = PtcState.Continuing; + } + } + + public void Close() + { + if (State == PtcState.Enabled || + State == PtcState.Continuing) + { + State = PtcState.Closing; + } + } + + public void Disable() + { + State = PtcState.Disabled; + } + + private void Wait() + { + _waitEvent.WaitOne(); + } + + public void Dispose() + { + if (!_disposed) + { + _disposed = true; + + Wait(); + _waitEvent.Dispose(); + + DisposeCarriers(); + } + } + } +} diff --git a/src/ARMeilleure/Translation/PTC/PtcFormatter.cs b/src/ARMeilleure/Translation/PTC/PtcFormatter.cs new file mode 100644 index 00000000..2f7a9c21 --- /dev/null +++ b/src/ARMeilleure/Translation/PTC/PtcFormatter.cs @@ -0,0 +1,179 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace ARMeilleure.Translation.PTC +{ + static class PtcFormatter + { + #region "Deserialize" + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Dictionary<TKey, TValue> DeserializeDictionary<TKey, TValue>(Stream stream, Func<Stream, TValue> valueFunc) where TKey : struct + { + Dictionary<TKey, TValue> dictionary = new(); + + int count = DeserializeStructure<int>(stream); + + for (int i = 0; i < count; i++) + { + TKey key = DeserializeStructure<TKey>(stream); + TValue value = valueFunc(stream); + + dictionary.Add(key, value); + } + + return dictionary; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static List<T> DeserializeList<T>(Stream stream) where T : struct + { + List<T> list = new(); + + int count = DeserializeStructure<int>(stream); + + for (int i = 0; i < count; i++) + { + T item = DeserializeStructure<T>(stream); + + list.Add(item); + } + + return list; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static T DeserializeStructure<T>(Stream stream) where T : struct + { + T structure = default(T); + + Span<T> spanT = MemoryMarshal.CreateSpan(ref structure, 1); + int bytesCount = stream.Read(MemoryMarshal.AsBytes(spanT)); + + if (bytesCount != Unsafe.SizeOf<T>()) + { + throw new EndOfStreamException(); + } + + return structure; + } + #endregion + + #region "GetSerializeSize" + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int GetSerializeSizeDictionary<TKey, TValue>(Dictionary<TKey, TValue> dictionary, Func<TValue, int> valueFunc) where TKey : struct + { + int size = 0; + + size += Unsafe.SizeOf<int>(); + + foreach ((_, TValue value) in dictionary) + { + size += Unsafe.SizeOf<TKey>(); + size += valueFunc(value); + } + + return size; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static int GetSerializeSizeList<T>(List<T> list) where T : struct + { + int size = 0; + + size += Unsafe.SizeOf<int>(); + + size += list.Count * Unsafe.SizeOf<T>(); + + return size; + } + #endregion + + #region "Serialize" + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void SerializeDictionary<TKey, TValue>(Stream stream, Dictionary<TKey, TValue> dictionary, Action<Stream, TValue> valueAction) where TKey : struct + { + SerializeStructure<int>(stream, dictionary.Count); + + foreach ((TKey key, TValue value) in dictionary) + { + SerializeStructure<TKey>(stream, key); + valueAction(stream, value); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void SerializeList<T>(Stream stream, List<T> list) where T : struct + { + SerializeStructure<int>(stream, list.Count); + + foreach (T item in list) + { + SerializeStructure<T>(stream, item); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void SerializeStructure<T>(Stream stream, T structure) where T : struct + { + Span<T> spanT = MemoryMarshal.CreateSpan(ref structure, 1); + stream.Write(MemoryMarshal.AsBytes(spanT)); + } + #endregion + + #region "Extension methods" + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void ReadFrom<T>(this List<T[]> list, Stream stream) where T : struct + { + int count = DeserializeStructure<int>(stream); + + for (int i = 0; i < count; i++) + { + int itemLength = DeserializeStructure<int>(stream); + + T[] item = new T[itemLength]; + + int bytesCount = stream.Read(MemoryMarshal.AsBytes(item.AsSpan())); + + if (bytesCount != itemLength) + { + throw new EndOfStreamException(); + } + + list.Add(item); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static long Length<T>(this List<T[]> list) where T : struct + { + long size = 0L; + + size += Unsafe.SizeOf<int>(); + + foreach (T[] item in list) + { + size += Unsafe.SizeOf<int>(); + size += item.Length; + } + + return size; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void WriteTo<T>(this List<T[]> list, Stream stream) where T : struct + { + SerializeStructure<int>(stream, list.Count); + + foreach (T[] item in list) + { + SerializeStructure<int>(stream, item.Length); + + stream.Write(MemoryMarshal.AsBytes(item.AsSpan())); + } + } + #endregion + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Translation/PTC/PtcLoadingState.cs b/src/ARMeilleure/Translation/PTC/PtcLoadingState.cs new file mode 100644 index 00000000..526cf91f --- /dev/null +++ b/src/ARMeilleure/Translation/PTC/PtcLoadingState.cs @@ -0,0 +1,9 @@ +namespace ARMeilleure.Translation.PTC +{ + public enum PtcLoadingState + { + Start, + Loading, + Loaded + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Translation/PTC/PtcProfiler.cs b/src/ARMeilleure/Translation/PTC/PtcProfiler.cs new file mode 100644 index 00000000..391e29c7 --- /dev/null +++ b/src/ARMeilleure/Translation/PTC/PtcProfiler.cs @@ -0,0 +1,421 @@ +using ARMeilleure.State; +using Ryujinx.Common; +using Ryujinx.Common.Logging; +using Ryujinx.Common.Memory; +using System; +using System.Buffers.Binary; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Diagnostics; +using System.IO; +using System.IO.Compression; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Threading; + +using static ARMeilleure.Translation.PTC.PtcFormatter; + +namespace ARMeilleure.Translation.PTC +{ + class PtcProfiler + { + private const string OuterHeaderMagicString = "Pohd\0\0\0\0"; + + private const uint InternalVersion = 1866; //! Not to be incremented manually for each change to the ARMeilleure project. + + private const int SaveInterval = 30; // Seconds. + + private const CompressionLevel SaveCompressionLevel = CompressionLevel.Fastest; + + private readonly Ptc _ptc; + + private readonly System.Timers.Timer _timer; + + private readonly ulong _outerHeaderMagic; + + private readonly ManualResetEvent _waitEvent; + + private readonly object _lock; + + private bool _disposed; + + private Hash128 _lastHash; + + public Dictionary<ulong, FuncProfile> ProfiledFuncs { get; private set; } + + public bool Enabled { get; private set; } + + public ulong StaticCodeStart { get; set; } + public ulong StaticCodeSize { get; set; } + + public PtcProfiler(Ptc ptc) + { + _ptc = ptc; + + _timer = new System.Timers.Timer((double)SaveInterval * 1000d); + _timer.Elapsed += PreSave; + + _outerHeaderMagic = BinaryPrimitives.ReadUInt64LittleEndian(EncodingCache.UTF8NoBOM.GetBytes(OuterHeaderMagicString).AsSpan()); + + _waitEvent = new ManualResetEvent(true); + + _lock = new object(); + + _disposed = false; + + ProfiledFuncs = new Dictionary<ulong, FuncProfile>(); + + Enabled = false; + } + + public void AddEntry(ulong address, ExecutionMode mode, bool highCq) + { + if (IsAddressInStaticCodeRange(address)) + { + Debug.Assert(!highCq); + + lock (_lock) + { + ProfiledFuncs.TryAdd(address, new FuncProfile(mode, highCq: false)); + } + } + } + + public void UpdateEntry(ulong address, ExecutionMode mode, bool highCq) + { + if (IsAddressInStaticCodeRange(address)) + { + Debug.Assert(highCq); + + lock (_lock) + { + Debug.Assert(ProfiledFuncs.ContainsKey(address)); + + ProfiledFuncs[address] = new FuncProfile(mode, highCq: true); + } + } + } + + public bool IsAddressInStaticCodeRange(ulong address) + { + return address >= StaticCodeStart && address < StaticCodeStart + StaticCodeSize; + } + + public ConcurrentQueue<(ulong address, FuncProfile funcProfile)> GetProfiledFuncsToTranslate(TranslatorCache<TranslatedFunction> funcs) + { + var profiledFuncsToTranslate = new ConcurrentQueue<(ulong address, FuncProfile funcProfile)>(); + + foreach (var profiledFunc in ProfiledFuncs) + { + if (!funcs.ContainsKey(profiledFunc.Key)) + { + profiledFuncsToTranslate.Enqueue((profiledFunc.Key, profiledFunc.Value)); + } + } + + return profiledFuncsToTranslate; + } + + public void ClearEntries() + { + ProfiledFuncs.Clear(); + ProfiledFuncs.TrimExcess(); + } + + public void PreLoad() + { + _lastHash = default; + + string fileNameActual = $"{_ptc.CachePathActual}.info"; + string fileNameBackup = $"{_ptc.CachePathBackup}.info"; + + FileInfo fileInfoActual = new FileInfo(fileNameActual); + FileInfo fileInfoBackup = new FileInfo(fileNameBackup); + + if (fileInfoActual.Exists && fileInfoActual.Length != 0L) + { + if (!Load(fileNameActual, false)) + { + if (fileInfoBackup.Exists && fileInfoBackup.Length != 0L) + { + Load(fileNameBackup, true); + } + } + } + else if (fileInfoBackup.Exists && fileInfoBackup.Length != 0L) + { + Load(fileNameBackup, true); + } + } + + private bool Load(string fileName, bool isBackup) + { + using (FileStream compressedStream = new(fileName, FileMode.Open)) + using (DeflateStream deflateStream = new(compressedStream, CompressionMode.Decompress, true)) + { + OuterHeader outerHeader = DeserializeStructure<OuterHeader>(compressedStream); + + if (!outerHeader.IsHeaderValid()) + { + InvalidateCompressedStream(compressedStream); + + return false; + } + + if (outerHeader.Magic != _outerHeaderMagic) + { + InvalidateCompressedStream(compressedStream); + + return false; + } + + if (outerHeader.InfoFileVersion != InternalVersion) + { + InvalidateCompressedStream(compressedStream); + + return false; + } + + if (outerHeader.Endianness != Ptc.GetEndianness()) + { + InvalidateCompressedStream(compressedStream); + + return false; + } + + using (MemoryStream stream = MemoryStreamManager.Shared.GetStream()) + { + Debug.Assert(stream.Seek(0L, SeekOrigin.Begin) == 0L && stream.Length == 0L); + + try + { + deflateStream.CopyTo(stream); + } + catch + { + InvalidateCompressedStream(compressedStream); + + return false; + } + + Debug.Assert(stream.Position == stream.Length); + + stream.Seek(0L, SeekOrigin.Begin); + + Hash128 expectedHash = DeserializeStructure<Hash128>(stream); + + Hash128 actualHash = XXHash128.ComputeHash(GetReadOnlySpan(stream)); + + if (actualHash != expectedHash) + { + InvalidateCompressedStream(compressedStream); + + return false; + } + + ProfiledFuncs = Deserialize(stream); + + Debug.Assert(stream.Position == stream.Length); + + _lastHash = actualHash; + } + } + + long fileSize = new FileInfo(fileName).Length; + + Logger.Info?.Print(LogClass.Ptc, $"{(isBackup ? "Loaded Backup Profiling Info" : "Loaded Profiling Info")} (size: {fileSize} bytes, profiled functions: {ProfiledFuncs.Count})."); + + return true; + } + + private static Dictionary<ulong, FuncProfile> Deserialize(Stream stream) + { + return DeserializeDictionary<ulong, FuncProfile>(stream, (stream) => DeserializeStructure<FuncProfile>(stream)); + } + + private ReadOnlySpan<byte> GetReadOnlySpan(MemoryStream memoryStream) + { + return new(memoryStream.GetBuffer(), (int)memoryStream.Position, (int)memoryStream.Length - (int)memoryStream.Position); + } + + private void InvalidateCompressedStream(FileStream compressedStream) + { + compressedStream.SetLength(0L); + } + + private void PreSave(object source, System.Timers.ElapsedEventArgs e) + { + _waitEvent.Reset(); + + string fileNameActual = $"{_ptc.CachePathActual}.info"; + string fileNameBackup = $"{_ptc.CachePathBackup}.info"; + + FileInfo fileInfoActual = new FileInfo(fileNameActual); + + if (fileInfoActual.Exists && fileInfoActual.Length != 0L) + { + File.Copy(fileNameActual, fileNameBackup, true); + } + + Save(fileNameActual); + + _waitEvent.Set(); + } + + private void Save(string fileName) + { + int profiledFuncsCount; + + OuterHeader outerHeader = new OuterHeader(); + + outerHeader.Magic = _outerHeaderMagic; + + outerHeader.InfoFileVersion = InternalVersion; + outerHeader.Endianness = Ptc.GetEndianness(); + + outerHeader.SetHeaderHash(); + + using (MemoryStream stream = MemoryStreamManager.Shared.GetStream()) + { + Debug.Assert(stream.Seek(0L, SeekOrigin.Begin) == 0L && stream.Length == 0L); + + stream.Seek((long)Unsafe.SizeOf<Hash128>(), SeekOrigin.Begin); + + lock (_lock) + { + Serialize(stream, ProfiledFuncs); + + profiledFuncsCount = ProfiledFuncs.Count; + } + + Debug.Assert(stream.Position == stream.Length); + + stream.Seek((long)Unsafe.SizeOf<Hash128>(), SeekOrigin.Begin); + Hash128 hash = XXHash128.ComputeHash(GetReadOnlySpan(stream)); + + stream.Seek(0L, SeekOrigin.Begin); + SerializeStructure(stream, hash); + + if (hash == _lastHash) + { + return; + } + + using (FileStream compressedStream = new(fileName, FileMode.OpenOrCreate)) + using (DeflateStream deflateStream = new(compressedStream, SaveCompressionLevel, true)) + { + try + { + SerializeStructure(compressedStream, outerHeader); + + stream.WriteTo(deflateStream); + + _lastHash = hash; + } + catch + { + compressedStream.Position = 0L; + + _lastHash = default; + } + + if (compressedStream.Position < compressedStream.Length) + { + compressedStream.SetLength(compressedStream.Position); + } + } + } + + long fileSize = new FileInfo(fileName).Length; + + if (fileSize != 0L) + { + Logger.Info?.Print(LogClass.Ptc, $"Saved Profiling Info (size: {fileSize} bytes, profiled functions: {profiledFuncsCount})."); + } + } + + private void Serialize(Stream stream, Dictionary<ulong, FuncProfile> profiledFuncs) + { + SerializeDictionary(stream, profiledFuncs, (stream, structure) => SerializeStructure(stream, structure)); + } + + [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 29*/)] + private struct OuterHeader + { + public ulong Magic; + + public uint InfoFileVersion; + + public bool Endianness; + + public Hash128 HeaderHash; + + public void SetHeaderHash() + { + Span<OuterHeader> spanHeader = MemoryMarshal.CreateSpan(ref this, 1); + + HeaderHash = XXHash128.ComputeHash(MemoryMarshal.AsBytes(spanHeader).Slice(0, Unsafe.SizeOf<OuterHeader>() - Unsafe.SizeOf<Hash128>())); + } + + public bool IsHeaderValid() + { + Span<OuterHeader> spanHeader = MemoryMarshal.CreateSpan(ref this, 1); + + return XXHash128.ComputeHash(MemoryMarshal.AsBytes(spanHeader).Slice(0, Unsafe.SizeOf<OuterHeader>() - Unsafe.SizeOf<Hash128>())) == HeaderHash; + } + } + + [StructLayout(LayoutKind.Sequential, Pack = 1/*, Size = 5*/)] + public struct FuncProfile + { + public ExecutionMode Mode; + public bool HighCq; + + public FuncProfile(ExecutionMode mode, bool highCq) + { + Mode = mode; + HighCq = highCq; + } + } + + public void Start() + { + if (_ptc.State == PtcState.Enabled || + _ptc.State == PtcState.Continuing) + { + Enabled = true; + + _timer.Enabled = true; + } + } + + public void Stop() + { + Enabled = false; + + if (!_disposed) + { + _timer.Enabled = false; + } + } + + public void Wait() + { + _waitEvent.WaitOne(); + } + + public void Dispose() + { + if (!_disposed) + { + _disposed = true; + + _timer.Elapsed -= PreSave; + _timer.Dispose(); + + Wait(); + _waitEvent.Dispose(); + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Translation/PTC/PtcState.cs b/src/ARMeilleure/Translation/PTC/PtcState.cs new file mode 100644 index 00000000..ca4f4108 --- /dev/null +++ b/src/ARMeilleure/Translation/PTC/PtcState.cs @@ -0,0 +1,10 @@ +namespace ARMeilleure.Translation.PTC +{ + enum PtcState + { + Enabled, + Continuing, + Closing, + Disabled + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Translation/RegisterToLocal.cs b/src/ARMeilleure/Translation/RegisterToLocal.cs new file mode 100644 index 00000000..abb9b373 --- /dev/null +++ b/src/ARMeilleure/Translation/RegisterToLocal.cs @@ -0,0 +1,52 @@ +using ARMeilleure.IntermediateRepresentation; +using System.Collections.Generic; + +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Translation +{ + static class RegisterToLocal + { + public static void Rename(ControlFlowGraph cfg) + { + Dictionary<Register, Operand> registerToLocalMap = new Dictionary<Register, Operand>(); + + Operand GetLocal(Operand op) + { + Register register = op.GetRegister(); + + if (!registerToLocalMap.TryGetValue(register, out Operand local)) + { + local = Local(op.Type); + + registerToLocalMap.Add(register, local); + } + + return local; + } + + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + for (Operation node = block.Operations.First; node != default; node = node.ListNext) + { + Operand dest = node.Destination; + + if (dest != default && dest.Kind == OperandKind.Register) + { + node.Destination = GetLocal(dest); + } + + for (int index = 0; index < node.SourcesCount; index++) + { + Operand source = node.GetSource(index); + + if (source.Kind == OperandKind.Register) + { + node.SetSource(index, GetLocal(source)); + } + } + } + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Translation/RegisterUsage.cs b/src/ARMeilleure/Translation/RegisterUsage.cs new file mode 100644 index 00000000..3ec0a7b4 --- /dev/null +++ b/src/ARMeilleure/Translation/RegisterUsage.cs @@ -0,0 +1,394 @@ +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using System; +using System.Numerics; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.Translation +{ + static class RegisterUsage + { + private const int RegsCount = 32; + private const int RegsMask = RegsCount - 1; + + private readonly struct RegisterMask : IEquatable<RegisterMask> + { + public long IntMask => Mask.GetElement(0); + public long VecMask => Mask.GetElement(1); + + public Vector128<long> Mask { get; } + + public RegisterMask(Vector128<long> mask) + { + Mask = mask; + } + + public RegisterMask(long intMask, long vecMask) + { + Mask = Vector128.Create(intMask, vecMask); + } + + public static RegisterMask operator &(RegisterMask x, RegisterMask y) + { + if (Sse2.IsSupported) + { + return new RegisterMask(Sse2.And(x.Mask, y.Mask)); + } + + return new RegisterMask(x.IntMask & y.IntMask, x.VecMask & y.VecMask); + } + + public static RegisterMask operator |(RegisterMask x, RegisterMask y) + { + if (Sse2.IsSupported) + { + return new RegisterMask(Sse2.Or(x.Mask, y.Mask)); + } + + return new RegisterMask(x.IntMask | y.IntMask, x.VecMask | y.VecMask); + } + + public static RegisterMask operator ~(RegisterMask x) + { + if (Sse2.IsSupported) + { + return new RegisterMask(Sse2.AndNot(x.Mask, Vector128<long>.AllBitsSet)); + } + + return new RegisterMask(~x.IntMask, ~x.VecMask); + } + + public static bool operator ==(RegisterMask x, RegisterMask y) + { + return x.Equals(y); + } + + public static bool operator !=(RegisterMask x, RegisterMask y) + { + return !x.Equals(y); + } + + public override bool Equals(object obj) + { + return obj is RegisterMask regMask && Equals(regMask); + } + + public bool Equals(RegisterMask other) + { + return Mask.Equals(other.Mask); + } + + public override int GetHashCode() + { + return Mask.GetHashCode(); + } + } + + public static void RunPass(ControlFlowGraph cfg, ExecutionMode mode) + { + // Compute local register inputs and outputs used inside blocks. + RegisterMask[] localInputs = new RegisterMask[cfg.Blocks.Count]; + RegisterMask[] localOutputs = new RegisterMask[cfg.Blocks.Count]; + + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + for (Operation node = block.Operations.First; node != default; node = node.ListNext) + { + for (int index = 0; index < node.SourcesCount; index++) + { + Operand source = node.GetSource(index); + + if (source.Kind == OperandKind.Register) + { + Register register = source.GetRegister(); + + localInputs[block.Index] |= GetMask(register) & ~localOutputs[block.Index]; + } + } + + if (node.Destination != default && node.Destination.Kind == OperandKind.Register) + { + localOutputs[block.Index] |= GetMask(node.Destination.GetRegister()); + } + } + } + + // Compute global register inputs and outputs used across blocks. + RegisterMask[] globalCmnOutputs = new RegisterMask[cfg.Blocks.Count]; + + RegisterMask[] globalInputs = new RegisterMask[cfg.Blocks.Count]; + RegisterMask[] globalOutputs = new RegisterMask[cfg.Blocks.Count]; + + bool modified; + bool firstPass = true; + + do + { + modified = false; + + // Compute register outputs. + for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + if (block.Predecessors.Count != 0 && !HasContextLoad(block)) + { + BasicBlock predecessor = block.Predecessors[0]; + + RegisterMask cmnOutputs = localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index]; + RegisterMask outputs = globalOutputs[predecessor.Index]; + + for (int pIndex = 1; pIndex < block.Predecessors.Count; pIndex++) + { + predecessor = block.Predecessors[pIndex]; + + cmnOutputs &= localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index]; + outputs |= globalOutputs[predecessor.Index]; + } + + globalInputs[block.Index] |= outputs & ~cmnOutputs; + + if (!firstPass) + { + cmnOutputs &= globalCmnOutputs[block.Index]; + } + + modified |= Exchange(globalCmnOutputs, block.Index, cmnOutputs); + outputs |= localOutputs[block.Index]; + modified |= Exchange(globalOutputs, block.Index, globalOutputs[block.Index] | outputs); + } + else + { + modified |= Exchange(globalOutputs, block.Index, localOutputs[block.Index]); + } + } + + // Compute register inputs. + for (int index = 0; index < cfg.PostOrderBlocks.Length; index++) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + RegisterMask inputs = localInputs[block.Index]; + + for (int i = 0; i < block.SuccessorsCount; i++) + { + inputs |= globalInputs[block.GetSuccessor(i).Index]; + } + + inputs &= ~globalCmnOutputs[block.Index]; + + modified |= Exchange(globalInputs, block.Index, globalInputs[block.Index] | inputs); + } + + firstPass = false; + } + while (modified); + + // Insert load and store context instructions where needed. + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + bool hasContextLoad = HasContextLoad(block); + + if (hasContextLoad) + { + block.Operations.Remove(block.Operations.First); + } + + Operand arg = default; + + // The only block without any predecessor should be the entry block. + // It always needs a context load as it is the first block to run. + if (block.Predecessors.Count == 0 || hasContextLoad) + { + long vecMask = globalInputs[block.Index].VecMask; + long intMask = globalInputs[block.Index].IntMask; + + if (vecMask != 0 || intMask != 0) + { + arg = Local(OperandType.I64); + + Operation loadArg = block.Operations.AddFirst(Operation(Instruction.LoadArgument, arg, Const(0))); + + LoadLocals(block, vecMask, RegisterType.Vector, mode, loadArg, arg); + LoadLocals(block, intMask, RegisterType.Integer, mode, loadArg, arg); + } + } + + bool hasContextStore = HasContextStore(block); + + if (hasContextStore) + { + block.Operations.Remove(block.Operations.Last); + } + + if (EndsWithReturn(block) || hasContextStore) + { + long vecMask = globalOutputs[block.Index].VecMask; + long intMask = globalOutputs[block.Index].IntMask; + + if (vecMask != 0 || intMask != 0) + { + if (arg == default) + { + arg = Local(OperandType.I64); + + block.Append(Operation(Instruction.LoadArgument, arg, Const(0))); + } + + StoreLocals(block, intMask, RegisterType.Integer, mode, arg); + StoreLocals(block, vecMask, RegisterType.Vector, mode, arg); + } + } + } + } + + private static bool HasContextLoad(BasicBlock block) + { + return StartsWith(block, Instruction.LoadFromContext) && block.Operations.First.SourcesCount == 0; + } + + private static bool HasContextStore(BasicBlock block) + { + return EndsWith(block, Instruction.StoreToContext) && block.Operations.Last.SourcesCount == 0; + } + + private static bool StartsWith(BasicBlock block, Instruction inst) + { + if (block.Operations.Count > 0) + { + Operation first = block.Operations.First; + + return first != default && first.Instruction == inst; + } + + return false; + } + + private static bool EndsWith(BasicBlock block, Instruction inst) + { + if (block.Operations.Count > 0) + { + Operation last = block.Operations.Last; + + return last != default && last.Instruction == inst; + } + + return false; + } + + private static RegisterMask GetMask(Register register) + { + long intMask = 0; + long vecMask = 0; + + switch (register.Type) + { + case RegisterType.Flag: intMask = (1L << RegsCount) << register.Index; break; + case RegisterType.Integer: intMask = 1L << register.Index; break; + case RegisterType.FpFlag: vecMask = (1L << RegsCount) << register.Index; break; + case RegisterType.Vector: vecMask = 1L << register.Index; break; + } + + return new RegisterMask(intMask, vecMask); + } + + private static bool Exchange(RegisterMask[] masks, int blkIndex, RegisterMask value) + { + ref RegisterMask curValue = ref masks[blkIndex]; + + bool changed = curValue != value; + + curValue = value; + + return changed; + } + + private static void LoadLocals( + BasicBlock block, + long inputs, + RegisterType baseType, + ExecutionMode mode, + Operation loadArg, + Operand arg) + { + while (inputs != 0) + { + int bit = 63 - BitOperations.LeadingZeroCount((ulong)inputs); + + Operand dest = GetRegFromBit(bit, baseType, mode); + Operand offset = Const((long)NativeContext.GetRegisterOffset(dest.GetRegister())); + Operand addr = Local(OperandType.I64); + + block.Operations.AddAfter(loadArg, Operation(Instruction.Load, dest, addr)); + block.Operations.AddAfter(loadArg, Operation(Instruction.Add, addr, arg, offset)); + + inputs &= ~(1L << bit); + } + } + + private static void StoreLocals( + BasicBlock block, + long outputs, + RegisterType baseType, + ExecutionMode mode, + Operand arg) + { + while (outputs != 0) + { + int bit = BitOperations.TrailingZeroCount(outputs); + + Operand source = GetRegFromBit(bit, baseType, mode); + Operand offset = Const((long)NativeContext.GetRegisterOffset(source.GetRegister())); + Operand addr = Local(OperandType.I64); + + block.Append(Operation(Instruction.Add, addr, arg, offset)); + block.Append(Operation(Instruction.Store, default, addr, source)); + + outputs &= ~(1L << bit); + } + } + + private static Operand GetRegFromBit(int bit, RegisterType baseType, ExecutionMode mode) + { + if (bit < RegsCount) + { + return Register(bit, baseType, GetOperandType(baseType, mode)); + } + else if (baseType == RegisterType.Integer) + { + return Register(bit & RegsMask, RegisterType.Flag, OperandType.I32); + } + else if (baseType == RegisterType.Vector) + { + return Register(bit & RegsMask, RegisterType.FpFlag, OperandType.I32); + } + else + { + throw new ArgumentOutOfRangeException(nameof(bit)); + } + } + + private static OperandType GetOperandType(RegisterType type, ExecutionMode mode) + { + switch (type) + { + case RegisterType.Flag: return OperandType.I32; + case RegisterType.FpFlag: return OperandType.I32; + case RegisterType.Integer: return (mode == ExecutionMode.Aarch64) ? OperandType.I64 : OperandType.I32; + case RegisterType.Vector: return OperandType.V128; + } + + throw new ArgumentException($"Invalid register type \"{type}\"."); + } + + private static bool EndsWithReturn(BasicBlock block) + { + Operation last = block.Operations.Last; + + return last != default && last.Instruction == Instruction.Return; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Translation/RejitRequest.cs b/src/ARMeilleure/Translation/RejitRequest.cs new file mode 100644 index 00000000..1bed5c0a --- /dev/null +++ b/src/ARMeilleure/Translation/RejitRequest.cs @@ -0,0 +1,16 @@ +using ARMeilleure.State; + +namespace ARMeilleure.Translation +{ + struct RejitRequest + { + public ulong Address; + public ExecutionMode Mode; + + public RejitRequest(ulong address, ExecutionMode mode) + { + Address = address; + Mode = mode; + } + } +} diff --git a/src/ARMeilleure/Translation/SsaConstruction.cs b/src/ARMeilleure/Translation/SsaConstruction.cs new file mode 100644 index 00000000..2b6efc11 --- /dev/null +++ b/src/ARMeilleure/Translation/SsaConstruction.cs @@ -0,0 +1,289 @@ +using ARMeilleure.Common; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Translation +{ + static partial class Ssa + { + private class DefMap + { + private readonly Dictionary<int, Operand> _map; + private readonly BitMap _phiMasks; + + public DefMap() + { + _map = new Dictionary<int, Operand>(); + _phiMasks = new BitMap(Allocators.Default, RegisterConsts.TotalCount); + } + + public bool TryAddOperand(int key, Operand operand) + { + return _map.TryAdd(key, operand); + } + + public bool TryGetOperand(int key, out Operand operand) + { + return _map.TryGetValue(key, out operand); + } + + public bool AddPhi(int key) + { + return _phiMasks.Set(key); + } + + public bool HasPhi(int key) + { + return _phiMasks.IsSet(key); + } + } + + public static void Construct(ControlFlowGraph cfg) + { + var globalDefs = new DefMap[cfg.Blocks.Count]; + var localDefs = new Operand[cfg.LocalsCount + RegisterConsts.TotalCount]; + + var dfPhiBlocks = new Queue<BasicBlock>(); + + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + globalDefs[block.Index] = new DefMap(); + } + + // First pass, get all defs and locals uses. + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + for (Operation node = block.Operations.First; node != default; node = node.ListNext) + { + for (int index = 0; index < node.SourcesCount; index++) + { + Operand src = node.GetSource(index); + + if (TryGetId(src, out int srcKey)) + { + Operand local = localDefs[srcKey]; + + if (local == default) + { + local = src; + } + + node.SetSource(index, local); + } + } + + Operand dest = node.Destination; + + if (TryGetId(dest, out int destKey)) + { + Operand local = Local(dest.Type); + + localDefs[destKey] = local; + + node.Destination = local; + } + } + + for (int key = 0; key < localDefs.Length; key++) + { + Operand local = localDefs[key]; + + if (local == default) + { + continue; + } + + globalDefs[block.Index].TryAddOperand(key, local); + + dfPhiBlocks.Enqueue(block); + + while (dfPhiBlocks.TryDequeue(out BasicBlock dfPhiBlock)) + { + foreach (BasicBlock domFrontier in dfPhiBlock.DominanceFrontiers) + { + if (globalDefs[domFrontier.Index].AddPhi(key)) + { + dfPhiBlocks.Enqueue(domFrontier); + } + } + } + } + + Array.Clear(localDefs); + } + + // Second pass, rename variables with definitions on different blocks. + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + for (Operation node = block.Operations.First; node != default; node = node.ListNext) + { + for (int index = 0; index < node.SourcesCount; index++) + { + Operand src = node.GetSource(index); + + if (TryGetId(src, out int key)) + { + Operand local = localDefs[key]; + + if (local == default) + { + local = FindDef(globalDefs, block, src); + localDefs[key] = local; + } + + node.SetSource(index, local); + } + } + } + + Array.Clear(localDefs); + } + } + + private static Operand FindDef(DefMap[] globalDefs, BasicBlock current, Operand operand) + { + if (globalDefs[current.Index].HasPhi(GetId(operand))) + { + return InsertPhi(globalDefs, current, operand); + } + + if (current != current.ImmediateDominator) + { + return FindDefOnPred(globalDefs, current.ImmediateDominator, operand); + } + + return Undef(); + } + + private static Operand FindDefOnPred(DefMap[] globalDefs, BasicBlock current, Operand operand) + { + BasicBlock previous; + + do + { + DefMap defMap = globalDefs[current.Index]; + + int key = GetId(operand); + + if (defMap.TryGetOperand(key, out Operand lastDef)) + { + return lastDef; + } + + if (defMap.HasPhi(key)) + { + return InsertPhi(globalDefs, current, operand); + } + + previous = current; + current = current.ImmediateDominator; + } + while (previous != current); + + return Undef(); + } + + private static Operand InsertPhi(DefMap[] globalDefs, BasicBlock block, Operand operand) + { + // This block has a Phi that has not been materialized yet, but that + // would define a new version of the variable we're looking for. We need + // to materialize the Phi, add all the block/operand pairs into the Phi, and + // then use the definition from that Phi. + Operand local = Local(operand.Type); + + Operation operation = Operation.Factory.PhiOperation(local, block.Predecessors.Count); + + AddPhi(block, operation); + + globalDefs[block.Index].TryAddOperand(GetId(operand), local); + + PhiOperation phi = operation.AsPhi(); + + for (int index = 0; index < block.Predecessors.Count; index++) + { + BasicBlock predecessor = block.Predecessors[index]; + + phi.SetBlock(index, predecessor); + phi.SetSource(index, FindDefOnPred(globalDefs, predecessor, operand)); + } + + return local; + } + + private static void AddPhi(BasicBlock block, Operation phi) + { + Operation node = block.Operations.First; + + if (node != default) + { + while (node.ListNext != default && node.ListNext.Instruction == Instruction.Phi) + { + node = node.ListNext; + } + } + + if (node != default && node.Instruction == Instruction.Phi) + { + block.Operations.AddAfter(node, phi); + } + else + { + block.Operations.AddFirst(phi); + } + } + + private static bool TryGetId(Operand operand, out int result) + { + if (operand != default) + { + if (operand.Kind == OperandKind.Register) + { + Register reg = operand.GetRegister(); + + if (reg.Type == RegisterType.Integer) + { + result = reg.Index; + } + else if (reg.Type == RegisterType.Vector) + { + result = RegisterConsts.IntRegsCount + reg.Index; + } + else if (reg.Type == RegisterType.Flag) + { + result = RegisterConsts.IntAndVecRegsCount + reg.Index; + } + else /* if (reg.Type == RegisterType.FpFlag) */ + { + result = RegisterConsts.FpFlagsOffset + reg.Index; + } + + return true; + } + else if (operand.Kind == OperandKind.LocalVariable && operand.GetLocalNumber() > 0) + { + result = RegisterConsts.TotalCount + operand.GetLocalNumber() - 1; + + return true; + } + } + + result = -1; + + return false; + } + + private static int GetId(Operand operand) + { + if (!TryGetId(operand, out int key)) + { + Debug.Fail("OperandKind must be Register or a numbered LocalVariable."); + } + + return key; + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Translation/SsaDeconstruction.cs b/src/ARMeilleure/Translation/SsaDeconstruction.cs new file mode 100644 index 00000000..cd6bcca1 --- /dev/null +++ b/src/ARMeilleure/Translation/SsaDeconstruction.cs @@ -0,0 +1,48 @@ +using ARMeilleure.IntermediateRepresentation; + +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; +using static ARMeilleure.IntermediateRepresentation.Operation.Factory; + +namespace ARMeilleure.Translation +{ + static partial class Ssa + { + public static void Deconstruct(ControlFlowGraph cfg) + { + for (BasicBlock block = cfg.Blocks.First; block != null; block = block.ListNext) + { + Operation operation = block.Operations.First; + + while (operation != default && operation.Instruction == Instruction.Phi) + { + Operation nextNode = operation.ListNext; + + Operand local = Local(operation.Destination.Type); + + PhiOperation phi = operation.AsPhi(); + + for (int index = 0; index < phi.SourcesCount; index++) + { + BasicBlock predecessor = phi.GetBlock(cfg, index); + + Operand source = phi.GetSource(index); + + predecessor.Append(Operation(Instruction.Copy, local, source)); + + phi.SetSource(index, default); + } + + Operation copyOp = Operation(Instruction.Copy, operation.Destination, local); + + block.Operations.AddBefore(operation, copyOp); + + operation.Destination = default; + + block.Operations.Remove(operation); + + operation = nextNode; + } + } + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Translation/TranslatedFunction.cs b/src/ARMeilleure/Translation/TranslatedFunction.cs new file mode 100644 index 00000000..f007883e --- /dev/null +++ b/src/ARMeilleure/Translation/TranslatedFunction.cs @@ -0,0 +1,34 @@ +using ARMeilleure.Common; +using System; + +namespace ARMeilleure.Translation +{ + class TranslatedFunction + { + private readonly GuestFunction _func; // Ensure that this delegate will not be garbage collected. + + public IntPtr FuncPointer { get; } + public Counter<uint> CallCounter { get; } + public ulong GuestSize { get; } + public bool HighCq { get; } + + public TranslatedFunction(GuestFunction func, IntPtr funcPointer, Counter<uint> callCounter, ulong guestSize, bool highCq) + { + _func = func; + FuncPointer = funcPointer; + CallCounter = callCounter; + GuestSize = guestSize; + HighCq = highCq; + } + + public ulong Execute(State.ExecutionContext context) + { + return _func(context.NativeContextPtr); + } + + public ulong Execute(WrapperFunction dispatcher, State.ExecutionContext context) + { + return dispatcher(context.NativeContextPtr, (ulong)FuncPointer); + } + } +}
\ No newline at end of file diff --git a/src/ARMeilleure/Translation/Translator.cs b/src/ARMeilleure/Translation/Translator.cs new file mode 100644 index 00000000..f349c5eb --- /dev/null +++ b/src/ARMeilleure/Translation/Translator.cs @@ -0,0 +1,576 @@ +using ARMeilleure.CodeGen; +using ARMeilleure.Common; +using ARMeilleure.Decoders; +using ARMeilleure.Diagnostics; +using ARMeilleure.Instructions; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.Memory; +using ARMeilleure.Signal; +using ARMeilleure.State; +using ARMeilleure.Translation.Cache; +using ARMeilleure.Translation.PTC; +using Ryujinx.Common; +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Diagnostics; +using System.Runtime.InteropServices; +using System.Threading; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Translation +{ + public class Translator + { + private static readonly AddressTable<ulong>.Level[] Levels64Bit = + new AddressTable<ulong>.Level[] + { + new(31, 17), + new(23, 8), + new(15, 8), + new( 7, 8), + new( 2, 5) + }; + + private static readonly AddressTable<ulong>.Level[] Levels32Bit = + new AddressTable<ulong>.Level[] + { + new(31, 17), + new(23, 8), + new(15, 8), + new( 7, 8), + new( 1, 6) + }; + + private readonly IJitMemoryAllocator _allocator; + private readonly ConcurrentQueue<KeyValuePair<ulong, TranslatedFunction>> _oldFuncs; + + private readonly Ptc _ptc; + + internal TranslatorCache<TranslatedFunction> Functions { get; } + internal AddressTable<ulong> FunctionTable { get; } + internal EntryTable<uint> CountTable { get; } + internal TranslatorStubs Stubs { get; } + internal TranslatorQueue Queue { get; } + internal IMemoryManager Memory { get; } + + private volatile int _threadCount; + + // FIXME: Remove this once the init logic of the emulator will be redone. + public static readonly ManualResetEvent IsReadyForTranslation = new(false); + + public Translator(IJitMemoryAllocator allocator, IMemoryManager memory, bool for64Bits) + { + _allocator = allocator; + Memory = memory; + + _oldFuncs = new ConcurrentQueue<KeyValuePair<ulong, TranslatedFunction>>(); + + _ptc = new Ptc(); + + Queue = new TranslatorQueue(); + + JitCache.Initialize(allocator); + + CountTable = new EntryTable<uint>(); + Functions = new TranslatorCache<TranslatedFunction>(); + FunctionTable = new AddressTable<ulong>(for64Bits ? Levels64Bit : Levels32Bit); + Stubs = new TranslatorStubs(this); + + FunctionTable.Fill = (ulong)Stubs.SlowDispatchStub; + + if (memory.Type.IsHostMapped()) + { + NativeSignalHandler.InitializeSignalHandler(allocator.GetPageSize()); + } + } + + public IPtcLoadState LoadDiskCache(string titleIdText, string displayVersion, bool enabled) + { + _ptc.Initialize(titleIdText, displayVersion, enabled, Memory.Type); + return _ptc; + } + + public void PrepareCodeRange(ulong address, ulong size) + { + if (_ptc.Profiler.StaticCodeSize == 0) + { + _ptc.Profiler.StaticCodeStart = address; + _ptc.Profiler.StaticCodeSize = size; + } + } + + public void Execute(State.ExecutionContext context, ulong address) + { + if (Interlocked.Increment(ref _threadCount) == 1) + { + IsReadyForTranslation.WaitOne(); + + if (_ptc.State == PtcState.Enabled) + { + Debug.Assert(Functions.Count == 0); + _ptc.LoadTranslations(this); + _ptc.MakeAndSaveTranslations(this); + } + + _ptc.Profiler.Start(); + + _ptc.Disable(); + + // Simple heuristic, should be user configurable in future. (1 for 4 core/ht or less, 2 for 6 core + ht + // etc). All threads are normal priority except from the last, which just fills as much of the last core + // as the os lets it with a low priority. If we only have one rejit thread, it should be normal priority + // as highCq code is performance critical. + // + // TODO: Use physical cores rather than logical. This only really makes sense for processors with + // hyperthreading. Requires OS specific code. + int unboundedThreadCount = Math.Max(1, (Environment.ProcessorCount - 6) / 3); + int threadCount = Math.Min(4, unboundedThreadCount); + + for (int i = 0; i < threadCount; i++) + { + bool last = i != 0 && i == unboundedThreadCount - 1; + + Thread backgroundTranslatorThread = new Thread(BackgroundTranslate) + { + Name = "CPU.BackgroundTranslatorThread." + i, + Priority = last ? ThreadPriority.Lowest : ThreadPriority.Normal + }; + + backgroundTranslatorThread.Start(); + } + } + + Statistics.InitializeTimer(); + + NativeInterface.RegisterThread(context, Memory, this); + + if (Optimizations.UseUnmanagedDispatchLoop) + { + Stubs.DispatchLoop(context.NativeContextPtr, address); + } + else + { + do + { + address = ExecuteSingle(context, address); + } + while (context.Running && address != 0); + } + + NativeInterface.UnregisterThread(); + + if (Interlocked.Decrement(ref _threadCount) == 0) + { + ClearJitCache(); + + Queue.Dispose(); + Stubs.Dispose(); + FunctionTable.Dispose(); + CountTable.Dispose(); + + _ptc.Close(); + _ptc.Profiler.Stop(); + + _ptc.Dispose(); + _ptc.Profiler.Dispose(); + } + } + + private ulong ExecuteSingle(State.ExecutionContext context, ulong address) + { + TranslatedFunction func = GetOrTranslate(address, context.ExecutionMode); + + Statistics.StartTimer(); + + ulong nextAddr = func.Execute(Stubs.ContextWrapper, context); + + Statistics.StopTimer(address); + + return nextAddr; + } + + public ulong Step(State.ExecutionContext context, ulong address) + { + TranslatedFunction func = Translate(address, context.ExecutionMode, highCq: false, singleStep: true); + + address = func.Execute(Stubs.ContextWrapper, context); + + EnqueueForDeletion(address, func); + + return address; + } + + internal TranslatedFunction GetOrTranslate(ulong address, ExecutionMode mode) + { + if (!Functions.TryGetValue(address, out TranslatedFunction func)) + { + func = Translate(address, mode, highCq: false); + + TranslatedFunction oldFunc = Functions.GetOrAdd(address, func.GuestSize, func); + + if (oldFunc != func) + { + JitCache.Unmap(func.FuncPointer); + func = oldFunc; + } + + if (_ptc.Profiler.Enabled) + { + _ptc.Profiler.AddEntry(address, mode, highCq: false); + } + + RegisterFunction(address, func); + } + + return func; + } + + internal void RegisterFunction(ulong guestAddress, TranslatedFunction func) + { + if (FunctionTable.IsValid(guestAddress) && (Optimizations.AllowLcqInFunctionTable || func.HighCq)) + { + Volatile.Write(ref FunctionTable.GetValue(guestAddress), (ulong)func.FuncPointer); + } + } + + internal TranslatedFunction Translate(ulong address, ExecutionMode mode, bool highCq, bool singleStep = false) + { + var context = new ArmEmitterContext( + Memory, + CountTable, + FunctionTable, + Stubs, + address, + highCq, + _ptc.State != PtcState.Disabled, + mode: Aarch32Mode.User); + + Logger.StartPass(PassName.Decoding); + + Block[] blocks = Decoder.Decode(Memory, address, mode, highCq, singleStep ? DecoderMode.SingleInstruction : DecoderMode.MultipleBlocks); + + Logger.EndPass(PassName.Decoding); + + Logger.StartPass(PassName.Translation); + + EmitSynchronization(context); + + if (blocks[0].Address != address) + { + context.Branch(context.GetLabel(address)); + } + + ControlFlowGraph cfg = EmitAndGetCFG(context, blocks, out Range funcRange, out Counter<uint> counter); + + ulong funcSize = funcRange.End - funcRange.Start; + + Logger.EndPass(PassName.Translation, cfg); + + Logger.StartPass(PassName.RegisterUsage); + + RegisterUsage.RunPass(cfg, mode); + + Logger.EndPass(PassName.RegisterUsage); + + var retType = OperandType.I64; + var argTypes = new OperandType[] { OperandType.I64 }; + + var options = highCq ? CompilerOptions.HighCq : CompilerOptions.None; + + if (context.HasPtc && !singleStep) + { + options |= CompilerOptions.Relocatable; + } + + CompiledFunction compiledFunc = Compiler.Compile(cfg, argTypes, retType, options, RuntimeInformation.ProcessArchitecture); + + if (context.HasPtc && !singleStep) + { + Hash128 hash = Ptc.ComputeHash(Memory, address, funcSize); + + _ptc.WriteCompiledFunction(address, funcSize, hash, highCq, compiledFunc); + } + + GuestFunction func = compiledFunc.MapWithPointer<GuestFunction>(out IntPtr funcPointer); + + Allocators.ResetAll(); + + return new TranslatedFunction(func, funcPointer, counter, funcSize, highCq); + } + + private void BackgroundTranslate() + { + while (_threadCount != 0 && Queue.TryDequeue(out RejitRequest request)) + { + TranslatedFunction func = Translate(request.Address, request.Mode, highCq: true); + + Functions.AddOrUpdate(request.Address, func.GuestSize, func, (key, oldFunc) => + { + EnqueueForDeletion(key, oldFunc); + return func; + }); + + if (_ptc.Profiler.Enabled) + { + _ptc.Profiler.UpdateEntry(request.Address, request.Mode, highCq: true); + } + + RegisterFunction(request.Address, func); + } + } + + private readonly struct Range + { + public ulong Start { get; } + public ulong End { get; } + + public Range(ulong start, ulong end) + { + Start = start; + End = end; + } + } + + private static ControlFlowGraph EmitAndGetCFG( + ArmEmitterContext context, + Block[] blocks, + out Range range, + out Counter<uint> counter) + { + counter = null; + + ulong rangeStart = ulong.MaxValue; + ulong rangeEnd = 0; + + for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) + { + Block block = blocks[blkIndex]; + + if (!block.Exit) + { + if (rangeStart > block.Address) + { + rangeStart = block.Address; + } + + if (rangeEnd < block.EndAddress) + { + rangeEnd = block.EndAddress; + } + } + + if (block.Address == context.EntryAddress) + { + if (!context.HighCq) + { + EmitRejitCheck(context, out counter); + } + + context.ClearQcFlag(); + } + + context.CurrBlock = block; + + context.MarkLabel(context.GetLabel(block.Address)); + + if (block.Exit) + { + // Left option here as it may be useful if we need to return to managed rather than tail call in + // future. (eg. for debug) + bool useReturns = false; + + InstEmitFlowHelper.EmitVirtualJump(context, Const(block.Address), isReturn: useReturns); + } + else + { + for (int opcIndex = 0; opcIndex < block.OpCodes.Count; opcIndex++) + { + OpCode opCode = block.OpCodes[opcIndex]; + + context.CurrOp = opCode; + + bool isLastOp = opcIndex == block.OpCodes.Count - 1; + + if (isLastOp) + { + context.SyncQcFlag(); + + if (block.Branch != null && !block.Branch.Exit && block.Branch.Address <= block.Address) + { + EmitSynchronization(context); + } + } + + Operand lblPredicateSkip = default; + + if (context.IsInIfThenBlock && context.CurrentIfThenBlockCond != Condition.Al) + { + lblPredicateSkip = Label(); + + InstEmitFlowHelper.EmitCondBranch(context, lblPredicateSkip, context.CurrentIfThenBlockCond.Invert()); + } + + if (opCode is OpCode32 op && op.Cond < Condition.Al) + { + lblPredicateSkip = Label(); + + InstEmitFlowHelper.EmitCondBranch(context, lblPredicateSkip, op.Cond.Invert()); + } + + if (opCode.Instruction.Emitter != null) + { + opCode.Instruction.Emitter(context); + } + else + { + throw new InvalidOperationException($"Invalid instruction \"{opCode.Instruction.Name}\"."); + } + + if (lblPredicateSkip != default) + { + context.MarkLabel(lblPredicateSkip); + } + + if (context.IsInIfThenBlock && opCode.Instruction.Name != InstName.It) + { + context.AdvanceIfThenBlockState(); + } + } + } + } + + range = new Range(rangeStart, rangeEnd); + + return context.GetControlFlowGraph(); + } + + internal static void EmitRejitCheck(ArmEmitterContext context, out Counter<uint> counter) + { + const int MinsCallForRejit = 100; + + counter = new Counter<uint>(context.CountTable); + + Operand lblEnd = Label(); + + Operand address = !context.HasPtc ? + Const(ref counter.Value) : + Const(ref counter.Value, Ptc.CountTableSymbol); + + Operand curCount = context.Load(OperandType.I32, address); + Operand count = context.Add(curCount, Const(1)); + context.Store(address, count); + context.BranchIf(lblEnd, curCount, Const(MinsCallForRejit), Comparison.NotEqual, BasicBlockFrequency.Cold); + + context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.EnqueueForRejit)), Const(context.EntryAddress)); + + context.MarkLabel(lblEnd); + } + + internal static void EmitSynchronization(EmitterContext context) + { + long countOffs = NativeContext.GetCounterOffset(); + + Operand lblNonZero = Label(); + Operand lblExit = Label(); + + Operand countAddr = context.Add(context.LoadArgument(OperandType.I64, 0), Const(countOffs)); + Operand count = context.Load(OperandType.I32, countAddr); + context.BranchIfTrue(lblNonZero, count, BasicBlockFrequency.Cold); + + Operand running = context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.CheckSynchronization))); + context.BranchIfTrue(lblExit, running, BasicBlockFrequency.Cold); + + context.Return(Const(0L)); + + context.MarkLabel(lblNonZero); + count = context.Subtract(count, Const(1)); + context.Store(countAddr, count); + + context.MarkLabel(lblExit); + } + + public void InvalidateJitCacheRegion(ulong address, ulong size) + { + ulong[] overlapAddresses = Array.Empty<ulong>(); + + int overlapsCount = Functions.GetOverlaps(address, size, ref overlapAddresses); + + if (overlapsCount != 0) + { + // If rejit is running, stop it as it may be trying to rejit a function on the invalidated region. + ClearRejitQueue(allowRequeue: true); + } + + for (int index = 0; index < overlapsCount; index++) + { + ulong overlapAddress = overlapAddresses[index]; + + if (Functions.TryGetValue(overlapAddress, out TranslatedFunction overlap)) + { + Functions.Remove(overlapAddress); + Volatile.Write(ref FunctionTable.GetValue(overlapAddress), FunctionTable.Fill); + EnqueueForDeletion(overlapAddress, overlap); + } + } + + // TODO: Remove overlapping functions from the JitCache aswell. + // This should be done safely, with a mechanism to ensure the function is not being executed. + } + + internal void EnqueueForRejit(ulong guestAddress, ExecutionMode mode) + { + Queue.Enqueue(guestAddress, mode); + } + + private void EnqueueForDeletion(ulong guestAddress, TranslatedFunction func) + { + _oldFuncs.Enqueue(new(guestAddress, func)); + } + + private void ClearJitCache() + { + // Ensure no attempt will be made to compile new functions due to rejit. + ClearRejitQueue(allowRequeue: false); + + List<TranslatedFunction> functions = Functions.AsList(); + + foreach (var func in functions) + { + JitCache.Unmap(func.FuncPointer); + + func.CallCounter?.Dispose(); + } + + Functions.Clear(); + + while (_oldFuncs.TryDequeue(out var kv)) + { + JitCache.Unmap(kv.Value.FuncPointer); + + kv.Value.CallCounter?.Dispose(); + } + } + + private void ClearRejitQueue(bool allowRequeue) + { + if (!allowRequeue) + { + Queue.Clear(); + + return; + } + + lock (Queue.Sync) + { + while (Queue.Count > 0 && Queue.TryDequeue(out RejitRequest request)) + { + if (Functions.TryGetValue(request.Address, out var func) && func.CallCounter != null) + { + Volatile.Write(ref func.CallCounter.Value, 0); + } + } + } + } + } +} diff --git a/src/ARMeilleure/Translation/TranslatorCache.cs b/src/ARMeilleure/Translation/TranslatorCache.cs new file mode 100644 index 00000000..11286381 --- /dev/null +++ b/src/ARMeilleure/Translation/TranslatorCache.cs @@ -0,0 +1,95 @@ +using System; +using System.Collections.Generic; +using System.Threading; + +namespace ARMeilleure.Translation +{ + internal class TranslatorCache<T> + { + private readonly IntervalTree<ulong, T> _tree; + private readonly ReaderWriterLock _treeLock; + + public int Count => _tree.Count; + + public TranslatorCache() + { + _tree = new IntervalTree<ulong, T>(); + _treeLock = new ReaderWriterLock(); + } + + public bool TryAdd(ulong address, ulong size, T value) + { + return AddOrUpdate(address, size, value, null); + } + + public bool AddOrUpdate(ulong address, ulong size, T value, Func<ulong, T, T> updateFactoryCallback) + { + _treeLock.AcquireWriterLock(Timeout.Infinite); + bool result = _tree.AddOrUpdate(address, address + size, value, updateFactoryCallback); + _treeLock.ReleaseWriterLock(); + + return result; + } + + public T GetOrAdd(ulong address, ulong size, T value) + { + _treeLock.AcquireWriterLock(Timeout.Infinite); + value = _tree.GetOrAdd(address, address + size, value); + _treeLock.ReleaseWriterLock(); + + return value; + } + + public bool Remove(ulong address) + { + _treeLock.AcquireWriterLock(Timeout.Infinite); + bool removed = _tree.Remove(address) != 0; + _treeLock.ReleaseWriterLock(); + + return removed; + } + + public void Clear() + { + _treeLock.AcquireWriterLock(Timeout.Infinite); + _tree.Clear(); + _treeLock.ReleaseWriterLock(); + } + + public bool ContainsKey(ulong address) + { + _treeLock.AcquireReaderLock(Timeout.Infinite); + bool result = _tree.ContainsKey(address); + _treeLock.ReleaseReaderLock(); + + return result; + } + + public bool TryGetValue(ulong address, out T value) + { + _treeLock.AcquireReaderLock(Timeout.Infinite); + bool result = _tree.TryGet(address, out value); + _treeLock.ReleaseReaderLock(); + + return result; + } + + public int GetOverlaps(ulong address, ulong size, ref ulong[] overlaps) + { + _treeLock.AcquireReaderLock(Timeout.Infinite); + int count = _tree.Get(address, address + size, ref overlaps); + _treeLock.ReleaseReaderLock(); + + return count; + } + + public List<T> AsList() + { + _treeLock.AcquireReaderLock(Timeout.Infinite); + List<T> list = _tree.AsList(); + _treeLock.ReleaseReaderLock(); + + return list; + } + } +} diff --git a/src/ARMeilleure/Translation/TranslatorQueue.cs b/src/ARMeilleure/Translation/TranslatorQueue.cs new file mode 100644 index 00000000..fc0aa64f --- /dev/null +++ b/src/ARMeilleure/Translation/TranslatorQueue.cs @@ -0,0 +1,121 @@ +using ARMeilleure.Diagnostics; +using ARMeilleure.State; +using System; +using System.Collections.Generic; +using System.Threading; + +namespace ARMeilleure.Translation +{ + /// <summary> + /// Represents a queue of <see cref="RejitRequest"/>. + /// </summary> + /// <remarks> + /// This does not necessarily behave like a queue, i.e: a FIFO collection. + /// </remarks> + sealed class TranslatorQueue : IDisposable + { + private bool _disposed; + private readonly Stack<RejitRequest> _requests; + private readonly HashSet<ulong> _requestAddresses; + + /// <summary> + /// Gets the object used to synchronize access to the <see cref="TranslatorQueue"/>. + /// </summary> + public object Sync { get; } + + /// <summary> + /// Gets the number of requests in the <see cref="TranslatorQueue"/>. + /// </summary> + public int Count => _requests.Count; + + /// <summary> + /// Initializes a new instance of the <see cref="TranslatorQueue"/> class. + /// </summary> + public TranslatorQueue() + { + Sync = new object(); + + _requests = new Stack<RejitRequest>(); + _requestAddresses = new HashSet<ulong>(); + } + + /// <summary> + /// Enqueues a request with the specified <paramref name="address"/> and <paramref name="mode"/>. + /// </summary> + /// <param name="address">Address of request</param> + /// <param name="mode"><see cref="ExecutionMode"/> of request</param> + public void Enqueue(ulong address, ExecutionMode mode) + { + lock (Sync) + { + if (_requestAddresses.Add(address)) + { + _requests.Push(new RejitRequest(address, mode)); + + TranslatorEventSource.Log.RejitQueueAdd(1); + + Monitor.Pulse(Sync); + } + } + } + + /// <summary> + /// Tries to dequeue a <see cref="RejitRequest"/>. This will block the thread until a <see cref="RejitRequest"/> + /// is enqueued or the <see cref="TranslatorQueue"/> is disposed. + /// </summary> + /// <param name="result"><see cref="RejitRequest"/> dequeued</param> + /// <returns><see langword="true"/> on success; otherwise <see langword="false"/></returns> + public bool TryDequeue(out RejitRequest result) + { + while (!_disposed) + { + lock (Sync) + { + if (_requests.TryPop(out result)) + { + _requestAddresses.Remove(result.Address); + + TranslatorEventSource.Log.RejitQueueAdd(-1); + + return true; + } + + Monitor.Wait(Sync); + } + } + + result = default; + + return false; + } + + /// <summary> + /// Clears the <see cref="TranslatorQueue"/>. + /// </summary> + public void Clear() + { + lock (Sync) + { + TranslatorEventSource.Log.RejitQueueAdd(-_requests.Count); + + _requests.Clear(); + _requestAddresses.Clear(); + + Monitor.PulseAll(Sync); + } + } + + /// <summary> + /// Releases all resources used by the <see cref="TranslatorQueue"/> instance. + /// </summary> + public void Dispose() + { + if (!_disposed) + { + _disposed = true; + + Clear(); + } + } + } +} diff --git a/src/ARMeilleure/Translation/TranslatorStubs.cs b/src/ARMeilleure/Translation/TranslatorStubs.cs new file mode 100644 index 00000000..69648df4 --- /dev/null +++ b/src/ARMeilleure/Translation/TranslatorStubs.cs @@ -0,0 +1,312 @@ +using ARMeilleure.Instructions; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation.Cache; +using System; +using System.Reflection; +using System.Runtime.InteropServices; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Translation +{ + /// <summary> + /// Represents a stub manager. + /// </summary> + class TranslatorStubs : IDisposable + { + private static readonly Lazy<IntPtr> _slowDispatchStub = new(GenerateSlowDispatchStub, isThreadSafe: true); + + private bool _disposed; + + private readonly Translator _translator; + private readonly Lazy<IntPtr> _dispatchStub; + private readonly Lazy<DispatcherFunction> _dispatchLoop; + private readonly Lazy<WrapperFunction> _contextWrapper; + + /// <summary> + /// Gets the dispatch stub. + /// </summary> + /// <exception cref="ObjectDisposedException"><see cref="TranslatorStubs"/> instance was disposed</exception> + public IntPtr DispatchStub + { + get + { + ObjectDisposedException.ThrowIf(_disposed, this); + + return _dispatchStub.Value; + } + } + + /// <summary> + /// Gets the slow dispatch stub. + /// </summary> + /// <exception cref="ObjectDisposedException"><see cref="TranslatorStubs"/> instance was disposed</exception> + public IntPtr SlowDispatchStub + { + get + { + ObjectDisposedException.ThrowIf(_disposed, this); + + return _slowDispatchStub.Value; + } + } + + /// <summary> + /// Gets the dispatch loop function. + /// </summary> + /// <exception cref="ObjectDisposedException"><see cref="TranslatorStubs"/> instance was disposed</exception> + public DispatcherFunction DispatchLoop + { + get + { + ObjectDisposedException.ThrowIf(_disposed, this); + + return _dispatchLoop.Value; + } + } + + /// <summary> + /// Gets the context wrapper function. + /// </summary> + /// <exception cref="ObjectDisposedException"><see cref="TranslatorStubs"/> instance was disposed</exception> + public WrapperFunction ContextWrapper + { + get + { + ObjectDisposedException.ThrowIf(_disposed, this); + + return _contextWrapper.Value; + } + } + + /// <summary> + /// Initializes a new instance of the <see cref="TranslatorStubs"/> class with the specified + /// <see cref="Translator"/> instance. + /// </summary> + /// <param name="translator"><see cref="Translator"/> instance to use</param> + /// <exception cref="ArgumentNullException"><paramref name="translator"/> is null</exception> + public TranslatorStubs(Translator translator) + { + ArgumentNullException.ThrowIfNull(translator); + + _translator = translator; + _dispatchStub = new(GenerateDispatchStub, isThreadSafe: true); + _dispatchLoop = new(GenerateDispatchLoop, isThreadSafe: true); + _contextWrapper = new(GenerateContextWrapper, isThreadSafe: true); + } + + /// <summary> + /// Releases all resources used by the <see cref="TranslatorStubs"/> instance. + /// </summary> + public void Dispose() + { + Dispose(true); + GC.SuppressFinalize(this); + } + + /// <summary> + /// Releases all unmanaged and optionally managed resources used by the <see cref="TranslatorStubs"/> instance. + /// </summary> + /// <param name="disposing"><see langword="true"/> to dispose managed resources also; otherwise just unmanaged resouces</param> + protected virtual void Dispose(bool disposing) + { + if (!_disposed) + { + if (_dispatchStub.IsValueCreated) + { + JitCache.Unmap(_dispatchStub.Value); + } + + if (_dispatchLoop.IsValueCreated) + { + JitCache.Unmap(Marshal.GetFunctionPointerForDelegate(_dispatchLoop.Value)); + } + + _disposed = true; + } + } + + /// <summary> + /// Frees resources used by the <see cref="TranslatorStubs"/> instance. + /// </summary> + ~TranslatorStubs() + { + Dispose(false); + } + + /// <summary> + /// Generates a <see cref="DispatchStub"/>. + /// </summary> + /// <returns>Generated <see cref="DispatchStub"/></returns> + private IntPtr GenerateDispatchStub() + { + var context = new EmitterContext(); + + Operand lblFallback = Label(); + Operand lblEnd = Label(); + + // Load the target guest address from the native context. + Operand nativeContext = context.LoadArgument(OperandType.I64, 0); + Operand guestAddress = context.Load(OperandType.I64, + context.Add(nativeContext, Const((ulong)NativeContext.GetDispatchAddressOffset()))); + + // Check if guest address is within range of the AddressTable. + Operand masked = context.BitwiseAnd(guestAddress, Const(~_translator.FunctionTable.Mask)); + context.BranchIfTrue(lblFallback, masked); + + Operand index = default; + Operand page = Const((long)_translator.FunctionTable.Base); + + for (int i = 0; i < _translator.FunctionTable.Levels.Length; i++) + { + ref var level = ref _translator.FunctionTable.Levels[i]; + + // level.Mask is not used directly because it is more often bigger than 32-bits, so it will not + // be encoded as an immediate on x86's bitwise and operation. + Operand mask = Const(level.Mask >> level.Index); + + index = context.BitwiseAnd(context.ShiftRightUI(guestAddress, Const(level.Index)), mask); + + if (i < _translator.FunctionTable.Levels.Length - 1) + { + page = context.Load(OperandType.I64, context.Add(page, context.ShiftLeft(index, Const(3)))); + context.BranchIfFalse(lblFallback, page); + } + } + + Operand hostAddress; + Operand hostAddressAddr = context.Add(page, context.ShiftLeft(index, Const(3))); + hostAddress = context.Load(OperandType.I64, hostAddressAddr); + context.Tailcall(hostAddress, nativeContext); + + context.MarkLabel(lblFallback); + hostAddress = context.Call(typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFunctionAddress)), guestAddress); + context.Tailcall(hostAddress, nativeContext); + + var cfg = context.GetControlFlowGraph(); + var retType = OperandType.I64; + var argTypes = new[] { OperandType.I64 }; + + var func = Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<GuestFunction>(); + + return Marshal.GetFunctionPointerForDelegate(func); + } + + /// <summary> + /// Generates a <see cref="SlowDispatchStub"/>. + /// </summary> + /// <returns>Generated <see cref="SlowDispatchStub"/></returns> + private static IntPtr GenerateSlowDispatchStub() + { + var context = new EmitterContext(); + + // Load the target guest address from the native context. + Operand nativeContext = context.LoadArgument(OperandType.I64, 0); + Operand guestAddress = context.Load(OperandType.I64, + context.Add(nativeContext, Const((ulong)NativeContext.GetDispatchAddressOffset()))); + + MethodInfo getFuncAddress = typeof(NativeInterface).GetMethod(nameof(NativeInterface.GetFunctionAddress)); + Operand hostAddress = context.Call(getFuncAddress, guestAddress); + context.Tailcall(hostAddress, nativeContext); + + var cfg = context.GetControlFlowGraph(); + var retType = OperandType.I64; + var argTypes = new[] { OperandType.I64 }; + + var func = Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<GuestFunction>(); + + return Marshal.GetFunctionPointerForDelegate(func); + } + + /// <summary> + /// Emits code that syncs FP state before executing guest code, or returns it to normal. + /// </summary> + /// <param name="context">Emitter context for the method</param> + /// <param name="nativeContext">Pointer to the native context</param> + /// <param name="enter">True if entering guest code, false otherwise</param> + private void EmitSyncFpContext(EmitterContext context, Operand nativeContext, bool enter) + { + if (enter) + { + InstEmitSimdHelper.EnterArmFpMode(context, (flag) => + { + Operand flagAddress = context.Add(nativeContext, Const((ulong)NativeContext.GetRegisterOffset(new Register((int)flag, RegisterType.FpFlag)))); + return context.Load(OperandType.I32, flagAddress); + }); + } + else + { + InstEmitSimdHelper.ExitArmFpMode(context, (flag, value) => + { + Operand flagAddress = context.Add(nativeContext, Const((ulong)NativeContext.GetRegisterOffset(new Register((int)flag, RegisterType.FpFlag)))); + context.Store(flagAddress, value); + }); + } + } + + /// <summary> + /// Generates a <see cref="DispatchLoop"/> function. + /// </summary> + /// <returns><see cref="DispatchLoop"/> function</returns> + private DispatcherFunction GenerateDispatchLoop() + { + var context = new EmitterContext(); + + Operand beginLbl = Label(); + Operand endLbl = Label(); + + Operand nativeContext = context.LoadArgument(OperandType.I64, 0); + Operand guestAddress = context.Copy( + context.AllocateLocal(OperandType.I64), + context.LoadArgument(OperandType.I64, 1)); + + Operand runningAddress = context.Add(nativeContext, Const((ulong)NativeContext.GetRunningOffset())); + Operand dispatchAddress = context.Add(nativeContext, Const((ulong)NativeContext.GetDispatchAddressOffset())); + + EmitSyncFpContext(context, nativeContext, true); + + context.MarkLabel(beginLbl); + context.Store(dispatchAddress, guestAddress); + context.Copy(guestAddress, context.Call(Const((ulong)DispatchStub), OperandType.I64, nativeContext)); + context.BranchIfFalse(endLbl, guestAddress); + context.BranchIfFalse(endLbl, context.Load(OperandType.I32, runningAddress)); + context.Branch(beginLbl); + + context.MarkLabel(endLbl); + + EmitSyncFpContext(context, nativeContext, false); + + context.Return(); + + var cfg = context.GetControlFlowGraph(); + var retType = OperandType.None; + var argTypes = new[] { OperandType.I64, OperandType.I64 }; + + return Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<DispatcherFunction>(); + } + + /// <summary> + /// Generates a <see cref="ContextWrapper"/> function. + /// </summary> + /// <returns><see cref="ContextWrapper"/> function</returns> + private WrapperFunction GenerateContextWrapper() + { + var context = new EmitterContext(); + + Operand nativeContext = context.LoadArgument(OperandType.I64, 0); + Operand guestMethod = context.LoadArgument(OperandType.I64, 1); + + EmitSyncFpContext(context, nativeContext, true); + Operand returnValue = context.Call(guestMethod, OperandType.I64, nativeContext); + EmitSyncFpContext(context, nativeContext, false); + + context.Return(returnValue); + + var cfg = context.GetControlFlowGraph(); + var retType = OperandType.I64; + var argTypes = new[] { OperandType.I64, OperandType.I64 }; + + return Compiler.Compile(cfg, argTypes, retType, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<WrapperFunction>(); + } + } +} diff --git a/src/ARMeilleure/Translation/TranslatorTestMethods.cs b/src/ARMeilleure/Translation/TranslatorTestMethods.cs new file mode 100644 index 00000000..ab96019a --- /dev/null +++ b/src/ARMeilleure/Translation/TranslatorTestMethods.cs @@ -0,0 +1,148 @@ +using ARMeilleure.CodeGen.X86; +using ARMeilleure.IntermediateRepresentation; +using ARMeilleure.State; +using ARMeilleure.Translation; +using System; +using System.Runtime.InteropServices; +using static ARMeilleure.IntermediateRepresentation.Operand.Factory; + +namespace ARMeilleure.Translation +{ + public static class TranslatorTestMethods + { + public delegate int FpFlagsPInvokeTest(IntPtr managedMethod); + + private static bool SetPlatformFtz(EmitterContext context, bool ftz) + { + if (Optimizations.UseSse2) + { + Operand mxcsr = context.AddIntrinsicInt(Intrinsic.X86Stmxcsr); + + if (ftz) + { + mxcsr = context.BitwiseOr(mxcsr, Const((int)(Mxcsr.Ftz | Mxcsr.Um | Mxcsr.Dm))); + } + else + { + mxcsr = context.BitwiseAnd(mxcsr, Const(~(int)Mxcsr.Ftz)); + } + + context.AddIntrinsicNoRet(Intrinsic.X86Ldmxcsr, mxcsr); + + return true; + } + else if (Optimizations.UseAdvSimd) + { + Operand fpcr = context.AddIntrinsicInt(Intrinsic.Arm64MrsFpcr); + + if (ftz) + { + fpcr = context.BitwiseOr(fpcr, Const((int)FPCR.Fz)); + } + else + { + fpcr = context.BitwiseAnd(fpcr, Const(~(int)FPCR.Fz)); + } + + context.AddIntrinsicNoRet(Intrinsic.Arm64MsrFpcr, fpcr); + + return true; + } + else + { + return false; + } + } + + private static Operand FpBitsToInt(EmitterContext context, Operand fp) + { + Operand vec = context.VectorInsert(context.VectorZero(), fp, 0); + return context.VectorExtract(OperandType.I32, vec, 0); + } + + public static FpFlagsPInvokeTest GenerateFpFlagsPInvokeTest() + { + EmitterContext context = new EmitterContext(); + + Operand methodAddress = context.Copy(context.LoadArgument(OperandType.I64, 0)); + + // Verify that default dotnet fp state does not flush to zero. + // This is required for SoftFloat to function. + + // Denormal + zero != 0 + + Operand denormal = ConstF(BitConverter.Int32BitsToSingle(1)); // 1.40129846432e-45 + Operand zeroF = ConstF(0f); + Operand zero = Const(0); + + Operand result = context.Add(zeroF, denormal); + + // Must not be zero. + + Operand correct1Label = Label(); + + context.BranchIfFalse(correct1Label, context.ICompareEqual(FpBitsToInt(context, result), zero)); + + context.Return(Const(1)); + + context.MarkLabel(correct1Label); + + // Set flush to zero flag. If unsupported by the backend, just return true. + + if (!SetPlatformFtz(context, true)) + { + context.Return(Const(0)); + } + + // Denormal + zero == 0 + + Operand resultFz = context.Add(zeroF, denormal); + + // Must equal zero. + + Operand correct2Label = Label(); + + context.BranchIfTrue(correct2Label, context.ICompareEqual(FpBitsToInt(context, resultFz), zero)); + + SetPlatformFtz(context, false); + + context.Return(Const(2)); + + context.MarkLabel(correct2Label); + + // Call a managed method. This method should not change Fz state. + + context.Call(methodAddress, OperandType.None); + + // Denormal + zero == 0 + + Operand resultFz2 = context.Add(zeroF, denormal); + + // Must equal zero. + + Operand correct3Label = Label(); + + context.BranchIfTrue(correct3Label, context.ICompareEqual(FpBitsToInt(context, resultFz2), zero)); + + SetPlatformFtz(context, false); + + context.Return(Const(3)); + + context.MarkLabel(correct3Label); + + // Success. + + SetPlatformFtz(context, false); + + context.Return(Const(0)); + + // Compile and return the function. + + ControlFlowGraph cfg = context.GetControlFlowGraph(); + + OperandType[] argTypes = new OperandType[] { OperandType.I64 }; + + return Compiler.Compile(cfg, argTypes, OperandType.I32, CompilerOptions.HighCq, RuntimeInformation.ProcessArchitecture).Map<FpFlagsPInvokeTest>(); + } + } +} |
